ICU-7611 delete old copy of Mark's Java unicodetools from before he moved them to the unicode.org repository

X-SVN-Rev: 27924
2010-04-09 23:40:43 +00:00 · 2010-04-09 23:40:43 +00:00 · a7c0c94a15
commit a7c0c94a15
parent baed720ac1
167 changed files with 0 additions and 461201 deletions
--- a/tools/unicodetools/com/ibm/text/TestICU4J.java
+++ b/tools/unicodetools/com/ibm/text/TestICU4J.java
@ -1,392 +0,0 @@
 package com.ibm.text;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.text.UnicodeSetIterator;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.Normalizer;
 import com.ibm.icu.lang.UCharacter;
 import java.util.BitSet;
 import java.util.Set;
 import java.util.HashSet;
 import java.util.TreeSet;
 import java.util.Iterator;
 import java.text.NumberFormat;
 import com.ibm.text.utility.FastIntBinarySearch;
 public class TestICU4J {
  public static void main(String[] args) {
        String a = UTF16.valueOf(0x10000);
        String b = Normalizer.normalize("a\u0308", Normalizer.NFC);
        System.out.println(b);
    /*
    System.out.println(UCharacter.getType(0x10FFFF));
    System.out.println(UCharacter.getName(0x61));
    */
        testUnicodeSetSpeed(Character.TITLECASE_LETTER, 100);
        testUnicodeSetSpeed(Character.UNASSIGNED, 1);
  }
  static final boolean SHOW_ERRORS = false;
  static boolean OPTIMIZATION = true;
  static void testUnicodeSetSpeed(int prop, int ITERATIONS) {
        NumberFormat numb = NumberFormat.getNumberInstance();
        NumberFormat percent = NumberFormat.getPercentInstance();
        double start, delta, oldDelta;
        int temp = 0;
        Set s;
        UnicodeSet us;
        Iterator it;
        UnicodeSetIterator uit;
        BitSet bs = new BitSet();
        System.out.println();
        System.out.println("Getting characters for property " + prop);
        int total = 0;
        for (int cp = 0; cp < 0x10FFFF; ++cp) {
            if (UCharacter.getType(cp) == prop) {
                bs.set(cp);
                ++total;
            }
        }
        System.out.println("Total characters: " + numb.format(total));
        System.out.println("Loop Iterations: " + numb.format(ITERATIONS));
        System.out.println();
        System.out.println("Testing Add speed");
        s = new TreeSet();
        start = System.currentTimeMillis();
        for (int i = 0; i < ITERATIONS; ++i) {
            s.clear();
            for (int cp = 0; cp <= 0x10FFFF; ++cp) {
                if (bs.get(cp)) {
                    s.add(new Integer(cp));
                }
            }
        }
        oldDelta = delta = (System.currentTimeMillis() - start)/ITERATIONS;
        System.out.println("Set add time: " + numb.format(delta));
        System.out.println("Total characters: " + numb.format(s.size()));
        us = new UnicodeSet();
        start = System.currentTimeMillis();
        for (int i = 0; i < ITERATIONS; ++i) {
            us.clear();
            for (int cp = 0; cp <= 0x10FFFF; ++cp) {
                if (bs.get(cp)) {
                    optimizedAdd(us,cp);
                }
            }
        }
        optimizedDone(us);
        delta = (System.currentTimeMillis() - start)/ITERATIONS;
        System.out.println("UnicodeSet add time: " + numb.format(delta) + ", " + percent.format(delta/oldDelta));
        System.out.println("Total characters: " + numb.format(us.size()) + ", ranges: " + us.getRangeCount());
        System.out.println();
        System.out.println("Testing Contains speed");
        start = System.currentTimeMillis();
        for (int i = 0; i < ITERATIONS; ++i) {
            for (int cp = 0; cp <= 0x10FFFF; ++cp) {
                if (s.contains(new Integer(cp)) != bs.get(cp)) {
                    if (SHOW_ERRORS) System.out.println("Error at: " + info(cp));
                }
            }
        }
        oldDelta = delta = (System.currentTimeMillis() - start)/ITERATIONS;
        System.out.println("Set contains time: " + numb.format(delta));
        start = System.currentTimeMillis();
        for (int i = 0; i < ITERATIONS; ++i) {
            for (int cp = 0; cp <= 0x10FFFF; ++cp) {
                if (us.contains(cp) != bs.get(cp)) {
                    if (SHOW_ERRORS) System.out.println("Error at: " + info(cp));
                }
            }
        }
        delta = (System.currentTimeMillis() - start)/ITERATIONS;
        System.out.println("UnicodeSet contains time: " + numb.format(delta) + ", " + percent.format(delta/oldDelta));
        setupBinary(us);
        start = System.currentTimeMillis();
        for (int i = 0; i < ITERATIONS; ++i) {
            for (int cp = 0; cp <= 0x10FFFF; ++cp) {
                if (binaryContains(cp) != bs.get(cp)) {
                    if (SHOW_ERRORS) System.out.println("Error at: " + info(cp));
                }
            }
        }
        delta = (System.currentTimeMillis() - start)/ITERATIONS;
        System.out.println("BINARY UnicodeSet contains time: " + numb.format(delta) + ", " + percent.format(delta/oldDelta));
        System.out.println("Testing Iteration speed");
        start = System.currentTimeMillis();
        for (int i = 0; i < ITERATIONS; ++i) {
            it = s.iterator();
            while (it.hasNext()) {
                temp += ((Integer)it.next()).intValue();
            }
        }
        oldDelta = delta = (System.currentTimeMillis() - start)/ITERATIONS;
        System.out.println("Set iteration time: " + numb.format(delta));
        uit = new UnicodeSetIterator(us);
        start = System.currentTimeMillis();
        for (int i = 0; i < ITERATIONS; ++i) {
            uit.reset();
            while (uit.next()) {
                temp += uit.codepoint;
            }
        }
        delta = (System.currentTimeMillis() - start)/ITERATIONS;
        System.out.println("UnicodeSet iteration time: " + numb.format(delta) + ", " + percent.format(delta/oldDelta));
        uit.reset();
        start = System.currentTimeMillis();
        while (uit.nextRange()) {
            System.out.println(info(uit.codepoint, uit.codepointEnd));
        }
  }
  static FastIntBinarySearch fibs;
  static void setupBinary(UnicodeSet us) {
    int[] dummySearch = new int[us.getRangeCount()*2];
    int dummyLimit = 0;
    UnicodeSetIterator uit = new UnicodeSetIterator(us);
    while (uit.nextRange()) {
        dummySearch[dummyLimit++] = uit.codepoint;
        dummySearch[dummyLimit++] = uit.codepointEnd+1;
    }
    fibs = new FastIntBinarySearch(dummySearch);
  }
  static boolean binaryContains(int cp) {
    return ((fibs.findIndex(cp) & 1) != 0); // return true if odd
  }
  static String info(int cp) {
    return Integer.toString(cp, 16).toUpperCase() + " " + UCharacter.getName(cp);
  }
  static String info(int cpStart, int cpEnd) {
    if (cpStart == cpEnd) {
        return Integer.toString(cpStart, 16).toUpperCase()
            + " " + UCharacter.getName(cpStart);
    }
    return Integer.toString(cpStart, 16).toUpperCase() + ".." + Integer.toString(cpEnd, 16).toUpperCase()
        + " " + UCharacter.getName(cpStart) + ".." + UCharacter.getName(cpEnd);
  }
  static int first;
  static int limit = -2;
  static void optimizedAdd(UnicodeSet us, int cp) {
    if (!OPTIMIZATION) {
        us.add(cp);
        return;
    }
    if (cp == limit) {
        ++limit;
    } else {
        if (limit > 0) {
            us.add(first, limit - 1);
            // System.out.println(info(first, limit-1));
        }
        first = cp;
        limit = cp + 1;
    }
  }
  static void optimizedDone(UnicodeSet us) {
    if (!OPTIMIZATION) return;
    if (limit > 0) {
        us.add(first, limit - 1);
        //System.out.println(info(first, limit-1));
    }
    limit = -2; // reset to invalid
  }
  public static class UXCharacter {
 	/**
 	* Provides interface for properties in 
 	* http://www.unicode.org/Public/UNIDATA/PropertyAliases.txt
 	* and their values in 
 	* http://www.unicode.org/Public/UNIDATA/PropertyValueAliases.txt
 	*/
 	/**
 	 * Tests a particular code point to see if the cited property has the given value.
 	 *
 	 * Sample: the following are equivalent
 	 * <pre>
 	 *		if (UCharacter.test("LB", "AL", cp)) ...
 	 *		if (UCharacter.test("line break", "alphabetic", cp)) ...
 	 * </pre>
 	 *
 	 */
 	public static boolean test(String propertyName, String propertyValue, int codePoint) {
 		return false;
 	}
 	/**
 	 * Produces a UnicodeSet of code points that have the given propertyvalue for the given property.
 	 * @param set the resulting value. The set is cleared, 
 	 * then all the code points with the given <property, value> are added. 
 	 *
 	 * Sample: the following are equivalent
 	 * <pre>
 	 *		if (UCharacter.test("WSpace", cp)) ...
 	 *		if (UCharacter.test("White_Space", cp)) ...
 	 *		if (UCharacter.test("White_Space", "true", cp)) ...
 	 *		if (!UCharacter.test("White_Space", "false", cp)) ...
 	 * </pre>
 	 *
 	 */
 	public static void getSet(String propertyName, String propertyValue, UnicodeSet set) {
 		// logical implemenation. Real implementation would be way faster!
 		set.clear();
 		for (int cp = 0; cp <= 0x10FFFF; ++cp) {
 			if (test(propertyName, propertyValue, cp)) set.add(cp);
 		}
 	}
 	// ======================================================
 	// POSSIBLE ADDITIONAL UTILITIES FOR CONVENIENCE OR SPEED
 	// ======================================================
 	/**
 	 * Tests a particular code point to see if the cited boolean property is true.
 	 * @param propertyName the cited property
 	 * @param codePoint the particular code point
 	 * @return true if the cited property has the given value for the specified code point.
 	 *
 	 * Sample: the following are equivalent
 	 * <pre>
 	 *		if (UCharacter.test("WSpace", cp)) ...
 	 *		if (UCharacter.test("White_Space", cp)) ...
 	 *		if (UCharacter.test("White_Space", "true", cp)) ...
 	 *		if (!UCharacter.test("White_Space", "false", cp)) ...
 	 * </pre>
 	 *
 	 */
 	public static boolean test(String booleanPropertyName, int codePoint) {
 		return test(booleanPropertyName, "true", codePoint);
 	}
 	// ===============================================
 	// The following allow access to properties by number, saving a string lookup
 	// on each call.
 	// ===============================================
 	/**
 	 * Gets an index for higher-speed access to properties.
 	 *
 	 * Sample:
 	 * <pre>
 	 *		int prop = UCharacter.getPropertyIndexIndex("LB");
 	 *		int value = UCharacter.getValueIndex("LB", "AL");
 	 *		while (true) {
 	 *		...
 	 *		if (test(prop, value, codePoint)) ...
 	 * </pre>
 	 *
 	 */
 	public static int getPropertyIndex(String propertyName) {
 		return 0;
 	}
 	/**
 	 * Gets maximum property index, used for iterating through properties
 	 *
 	 */
 	public static int getMaxPropertyIndex() {
 		return 0;
 	}
 	static final byte // NAME_STYLE
 		SHORT = 0,
 		DEFAULT = 1,
 		LONG = 2;
 	/**
 	 * Gets property name
 	 *
 	 */
 	public static String getPropertyName(int propertyIndex, byte namestyle) {
 		return "";
 	}
 	/*
 	 * Tests a particular code point to see if the cited property has the given value.
 	 */
 	public static boolean test(int propertyIndex, String propertyValue, int codePoint) {
 		return false;
 	}
 	/**
 	 * Produces a UnicodeSet of code points that have the given propertyvalue for the given property.
 	 */
 	public static void getSet(int propertyIndex, String propertyValue, UnicodeSet set) {
 	}
 	// ===============================================
 	// The following allow access to enumerated property values by number,
 	// saving a string lookup on each call.
 	// They are only valid for enumerated properties
 	// including the combining character class (0..255).
 	// ===============================================
 	/**
 	 * Gets an index for higher-speed access to property values.
 	 * Only valid for enumerated properties.
 	 */
 	public static int getValueIndex(String propertyName, String propertyValue) {
 		return 0;
 	}
 	/**
 	 * Gets maximum value index for a given property, used for iterating through property values.
 	 * Only valid for enumerated properties.
 	 *
 	 */
 	public static int getMaxValueIndex(int propertyIndex) {
 		return 0;
 	}
 	/**
 	 * Gets property value, corresponding to one of the values passed in
 	 *
 	 */
 	public static String getValueName(int propertyIndex, int valueIndex, byte namestyle) {
 		return "";
 	}
 	/*
 	 * Tests a particular code point to see if the cited property has the given value.
 	 */
 	public static boolean test(int propertyIndex, int valueIndex, int codePoint) {
 		return false;
 	}
 	/**
 	 * Produces a UnicodeSet of code points that have the given propertyvalue for the given property.
 	 */
 	public static void getSet(int propertyIndex, int valueIndex, UnicodeSet set) {
 	}
 /* OPEN ISSUES:
 - Don't like the names of the functions. Any better options? test => hasValue? hasPropertyValue?
 - Should getSet really ADD to the set (avoiding the clear?) and be called addProperties?
 Maybe faster sometimes, but might also be more errorprone.
 */
  }
 }
--- a/tools/unicodetools/com/ibm/text/UCA/AbbreviatedUnicodeSetIterator.java
+++ b/tools/unicodetools/com/ibm/text/UCA/AbbreviatedUnicodeSetIterator.java
@ -1,66 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/AbbreviatedUnicodeSetIterator.java,v $ 
 * $Date: 2004/02/06 18:32:04 $ 
 * $Revision: 1.2 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCA;
 import java.util.*;
 import java.io.BufferedReader;
 import java.io.Reader;
 import java.io.PrintWriter;
 import java.io.FileReader;
 import java.text.MessageFormat;
 import java.io.IOException;
 import com.ibm.text.UCD.Normalizer;
 import com.ibm.text.UCD.UCD;
 import com.ibm.text.utility.*;
 import com.ibm.text.UCD.UnifiedBinaryProperty;
 import com.ibm.text.UCD.UCDProperty;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.text.UnicodeSetIterator;
 public class AbbreviatedUnicodeSetIterator extends UnicodeSetIterator {
    private boolean abbreviated;
    private int perRange;
    public AbbreviatedUnicodeSetIterator() {
        super();
        abbreviated = false;
    }
    public void reset(UnicodeSet newSet) {
        reset(newSet, false);
    }
    public void reset(UnicodeSet newSet, boolean abb) {
        reset(newSet, abb, 100);
    }
    public void reset(UnicodeSet newSet, boolean abb, int density) {
        super.reset(newSet);
        abbreviated = abb;
        perRange = newSet.getRangeCount();
        if (perRange != 0) {
            perRange = density / perRange;
        }
    }
    protected void loadRange(int myRange) {
        super.loadRange(myRange);
        if (abbreviated && (endElement > nextElement + perRange)) {
            endElement = nextElement + perRange;
        }
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCA/CEList.java
+++ b/tools/unicodetools/com/ibm/text/UCA/CEList.java
@ -1,256 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/CEList.java,v $ 
 * $Date: 2002/07/03 02:15:47 $ 
 * $Revision: 1.6 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCA;
 import com.ibm.text.UCD.*;
 import com.ibm.text.utility.*;
 public final class CEList implements java.lang.Comparable, UCD_Types {
  int[] contents;
  int startOffset;
  int endOffset;
  int count;
  public CEList (int[] source, int start, int end) {
    count = end-start;
    contents = new int[count];
    System.arraycopy(source, start, contents, 0, count);
    startOffset = 0;
    endOffset = count;
  }
  public CEList(int[] source) {
    this(source, 0, source.length);
  }
  private CEList(int[] source, int start, int end, boolean spare) {
    contents = source;
    startOffset = start;
    endOffset = end;
    count = end - start;
  }
  public CEList append(CEList that) {
    int[] newContents = new int[count + that.count];
    System.arraycopy(contents, startOffset, newContents, 0, count);
    System.arraycopy(that.contents, that.startOffset, newContents, count, that.count);
    return new CEList(newContents, 0, count + that.count, true);
  }
  public CEList sub(int start, int end) {
    return new CEList(contents, start, end, true);
  }
  public CEList start(int end) {
    return new CEList(contents, 0, end, true);
  }
  public CEList end(int start) {
    return new CEList(contents, start, contents.length, true);
  }
  public int length() {
    return count;
  }
  public int at(int i) {
    i -= startOffset;
    if (i < 0 || i >= count) throw new ArrayIndexOutOfBoundsException(i);
    return contents[i];
  }
  public int hashCode() {
    int result = count;
    for (int i = startOffset; i < endOffset; ++i) {
      result *= 37;
      result += contents[i];
    }
    return result;
  }
  public boolean equals(Object other) {
    try {
      CEList that = (CEList)other;
      if (count != that.count) return false;
      int delta = that.startOffset - startOffset;
      for (int i = startOffset; i < endOffset; ++i) {
        if (contents[i] != that.contents[i + delta]) return false;
      }
      return true;
    } catch (Exception e) {
      return false;
    }
  }
    public int compareTo(Object other) {
        CEList that = (CEList)other;
        try {
            int delta = that.startOffset - startOffset;
            int min = endOffset;
            int min2 = that.endOffset - delta;
            if (min > min2) min = min2;
            for (int i = startOffset; i < min; ++i) {
                if (contents[i] != that.contents[i + delta]) {
                    if ((contents[i] & 0xFFFFFFFFL) 
                        < (that.contents[i + delta] & 0xFFFFFFFFL)) return -1;
                    return 1;
                }
            }
            if (count < that.count) return -1;
            if (count > that.count) return 1;
            return 0;
        } catch (RuntimeException e) {
            System.out.println("This: " + this + ", that: " + other);
            System.out.println(startOffset + ", " + endOffset
                + ", " + count + ", " + contents.length);
            System.out.println(that.startOffset + ", " + that.endOffset
                + ", " + that.count + ", " + that.contents.length);
            throw e;
        }
    }
    public static byte remap(int ch, byte type, int t) {
        if (type != CANONICAL) {
            if (0x3041 <= ch && ch <= 0x3094) t = 0xE; // hiragana
            else if (0x30A1 <= ch && ch <= 0x30FA) t = 0x11; // katakana
        }
        switch (type) {
            case COMPATIBILITY: t = (t == 8) ? 0xA : 4; break;
            case COMPAT_FONT:  t = (t == 8) ? 0xB : 5; break;
            case COMPAT_NOBREAK: t = 0x1B; break;
            case COMPAT_INITIAL: t = 0x17; break;
            case COMPAT_MEDIAL: t = 0x18; break;
            case COMPAT_FINAL: t = 0x19; break;
            case COMPAT_ISOLATED: t = 0x1A; break;
            case COMPAT_CIRCLE: t = (t == 0x11) ? 0x13 : (t == 8) ? 0xC : 6; break;
            case COMPAT_SUPER: t = 0x14; break;
            case COMPAT_SUB: t = 0x15; break;
            case COMPAT_VERTICAL: t = 0x16; break;
            case COMPAT_WIDE: t= (t == 8) ? 9 : 3; break;
            case COMPAT_NARROW: t = (0xFF67 <= ch && ch <= 0xFF6F) ? 0x10 : 0x12; break;
            case COMPAT_SMALL: t = (t == 0xE) ? 0xE : 0xF; break;
            case COMPAT_SQUARE: t = (t == 8) ? 0x1D : 0x1C; break;
            case COMPAT_FRACTION: t = 0x1E; break;
        }
        return (byte)t;
    }
    public String toString() {
        if (startOffset >= endOffset) return toString(0);
        StringBuffer result = new StringBuffer();
        for (int i = startOffset; i < endOffset; ++i) {
            if (i != startOffset) result.append(' ');
            result.append(toString(contents[i]));
        }
        return result.toString();
    }
    public static String toString(int[] ces, int len) {
        if (len <= 0) return toString(0);
        StringBuffer result = new StringBuffer();
        for (int i = 0; i < len; ++i) {
            if (i != 0) result.append(' ');
            result.append(toString(ces[i]));
        }
        return result.toString();
    }
    public static String toString(IntStack ces) {
        if (ces.length() <= 0) return toString(0);
        StringBuffer result = new StringBuffer();
        for (int i = 0; i < ces.length(); ++i) {
            if (i != 0) result.append(' ');
            result.append(toString(ces.get(i)));
        }
        return result.toString();
    }
    public static String toString(int ce) {
        return "[" + Utility.hex(UCA.getPrimary(ce)) + "." 
          + Utility.hex(UCA.getSecondary(ce)) + "."
          + Utility.hex(UCA.getTertiary(ce)) + "]"
         // + "(" + NAME3[UCA.getTertiary(ce)] + ")"
          ;
    }
    static final String[] NAME3 = {
        "IGNORE",    // 0
        "BLK",     // Unused?
        "MIN",
        "WIDE",
        "COMPAT",
        "FONT",
        "CIRCLE",
        "RES-2",
        "CAP",
        "WIDECAP",
        "COMPATCAP",
        "FONTCAP",
        "CIRCLECAP",
        "HIRA-SMALL",
        "HIRA",
        "SMALL",
        "SMALL-NARROW",
        "KATA",
        "NARROW",
        "CIRCLE-KATA",
        "SUP-MNN",
        "SUB-MNS",
        "VERT", // Missing??
        "AINI",
        "AMED",
        "AFIN",
        "AISO",
        "NOBREAK", // Missing?
        "SQUARED",
        "SQUAREDCAP",
        "FRACTION",
        "MAX"
    };
    // testing
    public static void main(String args[]) throws Exception {
        /* This: [0241.0020.0004], that: [0F6B.0020.0002]
            1, 2, 1, 2
            0, 1, 1, 1
        */
        CEList t1 = new CEList(new int[] {0, 0x02412004});
        t1 = t1.sub(1,2);
        CEList t2 = new CEList(new int[] {0x0F6B2002});
        System.out.println(t1.compareTo(t2));
        CEList foo = new CEList(new int[] {0, 1, 2, 3, 4});
        CEList fuu = new CEList(new int[] {});
        int cc = foo.compareTo(fuu);
        System.out.println(cc);
        System.out.println(foo);
        System.out.println(foo.start(2));
        System.out.println(foo.end(1));
        CEList fii = new CEList(new int[] {2, 3});
        CEList foo2 = foo.sub(2,4);
        System.out.println(fii.equals(foo2));
        System.out.println(fii.compareTo(foo2));
        System.out.println(fii.compareTo(foo));
        System.out.println(fii.hashCode() == foo2.hashCode());
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCA/Case.java
+++ b/tools/unicodetools/com/ibm/text/UCA/Case.java
@ -1,826 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/Case.java,v $ 
 * $Date: 2001/08/31 00:20:40 $ 
 * $Revision: 1.2 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCA;
 public final class Case {
    static StringBuffer out = new StringBuffer();
    static String fold(char c) {
        return fold(String.valueOf(c));
    }
    static String fold(String in) {
        synchronized (out) {
            out.setLength(0);
            for (int i = 0; i < in.length(); ++i) {
                char c = in.charAt(i);
                String f = CF[c];
                if (f == null) out.append(c);
                else out.append(f);
            }
            return out.toString();
        }
    }
    static String[] CF = new String[65536];
    static {
 	 CF[0x0041]="\u0061";
 	 CF[0x0042]="\u0062";
 	 CF[0x0043]="\u0063";
 	 CF[0x0044]="\u0064";
 	 CF[0x0045]="\u0065";
 	 CF[0x0046]="\u0066";
 	 CF[0x0047]="\u0067";
 	 CF[0x0048]="\u0068";
 	 CF[0x0049]="\u0069";
 	 CF[0x004A]="\u006A";
 	 CF[0x004B]="\u006B";
 	 CF[0x004C]="\u006C";
 	 CF[0x004D]="\u006D";
 	 CF[0x004E]="\u006E";
 	 CF[0x004F]="\u006F";
 	 CF[0x0050]="\u0070";
 	 CF[0x0051]="\u0071";
 	 CF[0x0052]="\u0072";
 	 CF[0x0053]="\u0073";
 	 CF[0x0054]="\u0074";
 	 CF[0x0055]="\u0075";
 	 CF[0x0056]="\u0076";
 	 CF[0x0057]="\u0077";
 	 CF[0x0058]="\u0078";
 	 CF[0x0059]="\u0079";
 	 CF[0x005A]="\u007A";
 	 CF[0x00B5]="\u03BC";
 	 CF[0x00C0]="\u00E0";
 	 CF[0x00C1]="\u00E1";
 	 CF[0x00C2]="\u00E2";
 	 CF[0x00C3]="\u00E3";
 	 CF[0x00C4]="\u00E4";
 	 CF[0x00C5]="\u00E5";
 	 CF[0x00C6]="\u00E6";
 	 CF[0x00C7]="\u00E7";
 	 CF[0x00C8]="\u00E8";
 	 CF[0x00C9]="\u00E9";
 	 CF[0x00CA]="\u00EA";
 	 CF[0x00CB]="\u00EB";
 	 CF[0x00CC]="\u00EC";
 	 CF[0x00CD]="\u00ED";
 	 CF[0x00CE]="\u00EE";
 	 CF[0x00CF]="\u00EF";
 	 CF[0x00D0]="\u00F0";
 	 CF[0x00D1]="\u00F1";
 	 CF[0x00D2]="\u00F2";
 	 CF[0x00D3]="\u00F3";
 	 CF[0x00D4]="\u00F4";
 	 CF[0x00D5]="\u00F5";
 	 CF[0x00D6]="\u00F6";
 	 CF[0x00D8]="\u00F8";
 	 CF[0x00D9]="\u00F9";
 	 CF[0x00DA]="\u00FA";
 	 CF[0x00DB]="\u00FB";
 	 CF[0x00DC]="\u00FC";
 	 CF[0x00DD]="\u00FD";
 	 CF[0x00DE]="\u00FE";
 	 CF[0x00DF]="\u0073\u0073";
 	 CF[0x0100]="\u0101";
 	 CF[0x0102]="\u0103";
 	 CF[0x0104]="\u0105";
 	 CF[0x0106]="\u0107";
 	 CF[0x0108]="\u0109";
 	 CF[0x010A]="\u010B";
 	 CF[0x010C]="\u010D";
 	 CF[0x010E]="\u010F";
 	 CF[0x0110]="\u0111";
 	 CF[0x0112]="\u0113";
 	 CF[0x0114]="\u0115";
 	 CF[0x0116]="\u0117";
 	 CF[0x0118]="\u0119";
 	 CF[0x011A]="\u011B";
 	 CF[0x011C]="\u011D";
 	 CF[0x011E]="\u011F";
 	 CF[0x0120]="\u0121";
 	 CF[0x0122]="\u0123";
 	 CF[0x0124]="\u0125";
 	 CF[0x0126]="\u0127";
 	 CF[0x0128]="\u0129";
 	 CF[0x012A]="\u012B";
 	 CF[0x012C]="\u012D";
 	 CF[0x012E]="\u012F";
 	 CF[0x0130]="\u0069";
 	 CF[0x0131]="\u0069";
 	 CF[0x0132]="\u0133";
 	 CF[0x0134]="\u0135";
 	 CF[0x0136]="\u0137";
 	 CF[0x0139]="\u013A";
 	 CF[0x013B]="\u013C";
 	 CF[0x013D]="\u013E";
 	 CF[0x013F]="\u0140";
 	 CF[0x0141]="\u0142";
 	 CF[0x0143]="\u0144";
 	 CF[0x0145]="\u0146";
 	 CF[0x0147]="\u0148";
 	 CF[0x0149]="\u02BC\u006E";
 	 CF[0x014A]="\u014B";
 	 CF[0x014C]="\u014D";
 	 CF[0x014E]="\u014F";
 	 CF[0x0150]="\u0151";
 	 CF[0x0152]="\u0153";
 	 CF[0x0154]="\u0155";
 	 CF[0x0156]="\u0157";
 	 CF[0x0158]="\u0159";
 	 CF[0x015A]="\u015B";
 	 CF[0x015C]="\u015D";
 	 CF[0x015E]="\u015F";
 	 CF[0x0160]="\u0161";
 	 CF[0x0162]="\u0163";
 	 CF[0x0164]="\u0165";
 	 CF[0x0166]="\u0167";
 	 CF[0x0168]="\u0169";
 	 CF[0x016A]="\u016B";
 	 CF[0x016C]="\u016D";
 	 CF[0x016E]="\u016F";
 	 CF[0x0170]="\u0171";
 	 CF[0x0172]="\u0173";
 	 CF[0x0174]="\u0175";
 	 CF[0x0176]="\u0177";
 	 CF[0x0178]="\u00FF";
 	 CF[0x0179]="\u017A";
 	 CF[0x017B]="\u017C";
 	 CF[0x017D]="\u017E";
 	 CF[0x017F]="\u0073";
 	 CF[0x0181]="\u0253";
 	 CF[0x0182]="\u0183";
 	 CF[0x0184]="\u0185";
 	 CF[0x0186]="\u0254";
 	 CF[0x0187]="\u0188";
 	 CF[0x0189]="\u0256";
 	 CF[0x018A]="\u0257";
 	 CF[0x018B]="\u018C";
 	 CF[0x018E]="\u01DD";
 	 CF[0x018F]="\u0259";
 	 CF[0x0190]="\u025B";
 	 CF[0x0191]="\u0192";
 	 CF[0x0193]="\u0260";
 	 CF[0x0194]="\u0263";
 	 CF[0x0196]="\u0269";
 	 CF[0x0197]="\u0268";
 	 CF[0x0198]="\u0199";
 	 CF[0x019C]="\u026F";
 	 CF[0x019D]="\u0272";
 	 CF[0x019F]="\u0275";
 	 CF[0x01A0]="\u01A1";
 	 CF[0x01A2]="\u01A3";
 	 CF[0x01A4]="\u01A5";
 	 CF[0x01A6]="\u0280";
 	 CF[0x01A7]="\u01A8";
 	 CF[0x01A9]="\u0283";
 	 CF[0x01AC]="\u01AD";
 	 CF[0x01AE]="\u0288";
 	 CF[0x01AF]="\u01B0";
 	 CF[0x01B1]="\u028A";
 	 CF[0x01B2]="\u028B";
 	 CF[0x01B3]="\u01B4";
 	 CF[0x01B5]="\u01B6";
 	 CF[0x01B7]="\u0292";
 	 CF[0x01B8]="\u01B9";
 	 CF[0x01BC]="\u01BD";
 	 CF[0x01C4]="\u01C6";
 	 CF[0x01C5]="\u01C6";
 	 CF[0x01C7]="\u01C9";
 	 CF[0x01C8]="\u01C9";
 	 CF[0x01CA]="\u01CC";
 	 CF[0x01CB]="\u01CC";
 	 CF[0x01CD]="\u01CE";
 	 CF[0x01CF]="\u01D0";
 	 CF[0x01D1]="\u01D2";
 	 CF[0x01D3]="\u01D4";
 	 CF[0x01D5]="\u01D6";
 	 CF[0x01D7]="\u01D8";
 	 CF[0x01D9]="\u01DA";
 	 CF[0x01DB]="\u01DC";
 	 CF[0x01DE]="\u01DF";
 	 CF[0x01E0]="\u01E1";
 	 CF[0x01E2]="\u01E3";
 	 CF[0x01E4]="\u01E5";
 	 CF[0x01E6]="\u01E7";
 	 CF[0x01E8]="\u01E9";
 	 CF[0x01EA]="\u01EB";
 	 CF[0x01EC]="\u01ED";
 	 CF[0x01EE]="\u01EF";
 	 CF[0x01F0]="\u006A\u030C";
 	 CF[0x01F1]="\u01F3";
 	 CF[0x01F2]="\u01F3";
 	 CF[0x01F4]="\u01F5";
 	 CF[0x01F6]="\u0195";
 	 CF[0x01F7]="\u01BF";
 	 CF[0x01F8]="\u01F9";
 	 CF[0x01FA]="\u01FB";
 	 CF[0x01FC]="\u01FD";
 	 CF[0x01FE]="\u01FF";
 	 CF[0x0200]="\u0201";
 	 CF[0x0202]="\u0203";
 	 CF[0x0204]="\u0205";
 	 CF[0x0206]="\u0207";
 	 CF[0x0208]="\u0209";
 	 CF[0x020A]="\u020B";
 	 CF[0x020C]="\u020D";
 	 CF[0x020E]="\u020F";
 	 CF[0x0210]="\u0211";
 	 CF[0x0212]="\u0213";
 	 CF[0x0214]="\u0215";
 	 CF[0x0216]="\u0217";
 	 CF[0x0218]="\u0219";
 	 CF[0x021A]="\u021B";
 	 CF[0x021C]="\u021D";
 	 CF[0x021E]="\u021F";
 	 CF[0x0222]="\u0223";
 	 CF[0x0224]="\u0225";
 	 CF[0x0226]="\u0227";
 	 CF[0x0228]="\u0229";
 	 CF[0x022A]="\u022B";
 	 CF[0x022C]="\u022D";
 	 CF[0x022E]="\u022F";
 	 CF[0x0230]="\u0231";
 	 CF[0x0232]="\u0233";
 	 CF[0x0345]="\u03B9";
 	 CF[0x0386]="\u03AC";
 	 CF[0x0388]="\u03AD";
 	 CF[0x0389]="\u03AE";
 	 CF[0x038A]="\u03AF";
 	 CF[0x038C]="\u03CC";
 	 CF[0x038E]="\u03CD";
 	 CF[0x038F]="\u03CE";
 	 CF[0x0390]="\u03B9\u0308\u0301";
 	 CF[0x0391]="\u03B1";
 	 CF[0x0392]="\u03B2";
 	 CF[0x0393]="\u03B3";
 	 CF[0x0394]="\u03B4";
 	 CF[0x0395]="\u03B5";
 	 CF[0x0396]="\u03B6";
 	 CF[0x0397]="\u03B7";
 	 CF[0x0398]="\u03B8";
 	 CF[0x0399]="\u03B9";
 	 CF[0x039A]="\u03BA";
 	 CF[0x039B]="\u03BB";
 	 CF[0x039C]="\u03BC";
 	 CF[0x039D]="\u03BD";
 	 CF[0x039E]="\u03BE";
 	 CF[0x039F]="\u03BF";
 	 CF[0x03A0]="\u03C0";
 	 CF[0x03A1]="\u03C1";
 	 CF[0x03A3]="\u03C2";
 	 CF[0x03A4]="\u03C4";
 	 CF[0x03A5]="\u03C5";
 	 CF[0x03A6]="\u03C6";
 	 CF[0x03A7]="\u03C7";
 	 CF[0x03A8]="\u03C8";
 	 CF[0x03A9]="\u03C9";
 	 CF[0x03AA]="\u03CA";
 	 CF[0x03AB]="\u03CB";
 	 CF[0x03B0]="\u03C5\u0308\u0301";
 	 CF[0x03C3]="\u03C2";
 	 CF[0x03D0]="\u03B2";
 	 CF[0x03D1]="\u03B8";
 	 CF[0x03D5]="\u03C6";
 	 CF[0x03D6]="\u03C0";
 	 CF[0x03DA]="\u03DB";
 	 CF[0x03DC]="\u03DD";
 	 CF[0x03DE]="\u03DF";
 	 CF[0x03E0]="\u03E1";
 	 CF[0x03E2]="\u03E3";
 	 CF[0x03E4]="\u03E5";
 	 CF[0x03E6]="\u03E7";
 	 CF[0x03E8]="\u03E9";
 	 CF[0x03EA]="\u03EB";
 	 CF[0x03EC]="\u03ED";
 	 CF[0x03EE]="\u03EF";
 	 CF[0x03F0]="\u03BA";
 	 CF[0x03F1]="\u03C1";
 	 CF[0x03F2]="\u03C2";
 	 CF[0x0400]="\u0450";
 	 CF[0x0401]="\u0451";
 	 CF[0x0402]="\u0452";
 	 CF[0x0403]="\u0453";
 	 CF[0x0404]="\u0454";
 	 CF[0x0405]="\u0455";
 	 CF[0x0406]="\u0456";
 	 CF[0x0407]="\u0457";
 	 CF[0x0408]="\u0458";
 	 CF[0x0409]="\u0459";
 	 CF[0x040A]="\u045A";
 	 CF[0x040B]="\u045B";
 	 CF[0x040C]="\u045C";
 	 CF[0x040D]="\u045D";
 	 CF[0x040E]="\u045E";
 	 CF[0x040F]="\u045F";
 	 CF[0x0410]="\u0430";
 	 CF[0x0411]="\u0431";
 	 CF[0x0412]="\u0432";
 	 CF[0x0413]="\u0433";
 	 CF[0x0414]="\u0434";
 	 CF[0x0415]="\u0435";
 	 CF[0x0416]="\u0436";
 	 CF[0x0417]="\u0437";
 	 CF[0x0418]="\u0438";
 	 CF[0x0419]="\u0439";
 	 CF[0x041A]="\u043A";
 	 CF[0x041B]="\u043B";
 	 CF[0x041C]="\u043C";
 	 CF[0x041D]="\u043D";
 	 CF[0x041E]="\u043E";
 	 CF[0x041F]="\u043F";
 	 CF[0x0420]="\u0440";
 	 CF[0x0421]="\u0441";
 	 CF[0x0422]="\u0442";
 	 CF[0x0423]="\u0443";
 	 CF[0x0424]="\u0444";
 	 CF[0x0425]="\u0445";
 	 CF[0x0426]="\u0446";
 	 CF[0x0427]="\u0447";
 	 CF[0x0428]="\u0448";
 	 CF[0x0429]="\u0449";
 	 CF[0x042A]="\u044A";
 	 CF[0x042B]="\u044B";
 	 CF[0x042C]="\u044C";
 	 CF[0x042D]="\u044D";
 	 CF[0x042E]="\u044E";
 	 CF[0x042F]="\u044F";
 	 CF[0x0460]="\u0461";
 	 CF[0x0462]="\u0463";
 	 CF[0x0464]="\u0465";
 	 CF[0x0466]="\u0467";
 	 CF[0x0468]="\u0469";
 	 CF[0x046A]="\u046B";
 	 CF[0x046C]="\u046D";
 	 CF[0x046E]="\u046F";
 	 CF[0x0470]="\u0471";
 	 CF[0x0472]="\u0473";
 	 CF[0x0474]="\u0475";
 	 CF[0x0476]="\u0477";
 	 CF[0x0478]="\u0479";
 	 CF[0x047A]="\u047B";
 	 CF[0x047C]="\u047D";
 	 CF[0x047E]="\u047F";
 	 CF[0x0480]="\u0481";
 	 CF[0x048C]="\u048D";
 	 CF[0x048E]="\u048F";
 	 CF[0x0490]="\u0491";
 	 CF[0x0492]="\u0493";
 	 CF[0x0494]="\u0495";
 	 CF[0x0496]="\u0497";
 	 CF[0x0498]="\u0499";
 	 CF[0x049A]="\u049B";
 	 CF[0x049C]="\u049D";
 	 CF[0x049E]="\u049F";
 	 CF[0x04A0]="\u04A1";
 	 CF[0x04A2]="\u04A3";
 	 CF[0x04A4]="\u04A5";
 	 CF[0x04A6]="\u04A7";
 	 CF[0x04A8]="\u04A9";
 	 CF[0x04AA]="\u04AB";
 	 CF[0x04AC]="\u04AD";
 	 CF[0x04AE]="\u04AF";
 	 CF[0x04B0]="\u04B1";
 	 CF[0x04B2]="\u04B3";
 	 CF[0x04B4]="\u04B5";
 	 CF[0x04B6]="\u04B7";
 	 CF[0x04B8]="\u04B9";
 	 CF[0x04BA]="\u04BB";
 	 CF[0x04BC]="\u04BD";
 	 CF[0x04BE]="\u04BF";
 	 CF[0x04C1]="\u04C2";
 	 CF[0x04C3]="\u04C4";
 	 CF[0x04C7]="\u04C8";
 	 CF[0x04CB]="\u04CC";
 	 CF[0x04D0]="\u04D1";
 	 CF[0x04D2]="\u04D3";
 	 CF[0x04D4]="\u04D5";
 	 CF[0x04D6]="\u04D7";
 	 CF[0x04D8]="\u04D9";
 	 CF[0x04DA]="\u04DB";
 	 CF[0x04DC]="\u04DD";
 	 CF[0x04DE]="\u04DF";
 	 CF[0x04E0]="\u04E1";
 	 CF[0x04E2]="\u04E3";
 	 CF[0x04E4]="\u04E5";
 	 CF[0x04E6]="\u04E7";
 	 CF[0x04E8]="\u04E9";
 	 CF[0x04EA]="\u04EB";
 	 CF[0x04EC]="\u04ED";
 	 CF[0x04EE]="\u04EF";
 	 CF[0x04F0]="\u04F1";
 	 CF[0x04F2]="\u04F3";
 	 CF[0x04F4]="\u04F5";
 	 CF[0x04F8]="\u04F9";
 	 CF[0x0531]="\u0561";
 	 CF[0x0532]="\u0562";
 	 CF[0x0533]="\u0563";
 	 CF[0x0534]="\u0564";
 	 CF[0x0535]="\u0565";
 	 CF[0x0536]="\u0566";
 	 CF[0x0537]="\u0567";
 	 CF[0x0538]="\u0568";
 	 CF[0x0539]="\u0569";
 	 CF[0x053A]="\u056A";
 	 CF[0x053B]="\u056B";
 	 CF[0x053C]="\u056C";
 	 CF[0x053D]="\u056D";
 	 CF[0x053E]="\u056E";
 	 CF[0x053F]="\u056F";
 	 CF[0x0540]="\u0570";
 	 CF[0x0541]="\u0571";
 	 CF[0x0542]="\u0572";
 	 CF[0x0543]="\u0573";
 	 CF[0x0544]="\u0574";
 	 CF[0x0545]="\u0575";
 	 CF[0x0546]="\u0576";
 	 CF[0x0547]="\u0577";
 	 CF[0x0548]="\u0578";
 	 CF[0x0549]="\u0579";
 	 CF[0x054A]="\u057A";
 	 CF[0x054B]="\u057B";
 	 CF[0x054C]="\u057C";
 	 CF[0x054D]="\u057D";
 	 CF[0x054E]="\u057E";
 	 CF[0x054F]="\u057F";
 	 CF[0x0550]="\u0580";
 	 CF[0x0551]="\u0581";
 	 CF[0x0552]="\u0582";
 	 CF[0x0553]="\u0583";
 	 CF[0x0554]="\u0584";
 	 CF[0x0555]="\u0585";
 	 CF[0x0556]="\u0586";
 	 CF[0x0587]="\u0565\u0582";
 	 CF[0x1E00]="\u1E01";
 	 CF[0x1E02]="\u1E03";
 	 CF[0x1E04]="\u1E05";
 	 CF[0x1E06]="\u1E07";
 	 CF[0x1E08]="\u1E09";
 	 CF[0x1E0A]="\u1E0B";
 	 CF[0x1E0C]="\u1E0D";
 	 CF[0x1E0E]="\u1E0F";
 	 CF[0x1E10]="\u1E11";
 	 CF[0x1E12]="\u1E13";
 	 CF[0x1E14]="\u1E15";
 	 CF[0x1E16]="\u1E17";
 	 CF[0x1E18]="\u1E19";
 	 CF[0x1E1A]="\u1E1B";
 	 CF[0x1E1C]="\u1E1D";
 	 CF[0x1E1E]="\u1E1F";
 	 CF[0x1E20]="\u1E21";
 	 CF[0x1E22]="\u1E23";
 	 CF[0x1E24]="\u1E25";
 	 CF[0x1E26]="\u1E27";
 	 CF[0x1E28]="\u1E29";
 	 CF[0x1E2A]="\u1E2B";
 	 CF[0x1E2C]="\u1E2D";
 	 CF[0x1E2E]="\u1E2F";
 	 CF[0x1E30]="\u1E31";
 	 CF[0x1E32]="\u1E33";
 	 CF[0x1E34]="\u1E35";
 	 CF[0x1E36]="\u1E37";
 	 CF[0x1E38]="\u1E39";
 	 CF[0x1E3A]="\u1E3B";
 	 CF[0x1E3C]="\u1E3D";
 	 CF[0x1E3E]="\u1E3F";
 	 CF[0x1E40]="\u1E41";
 	 CF[0x1E42]="\u1E43";
 	 CF[0x1E44]="\u1E45";
 	 CF[0x1E46]="\u1E47";
 	 CF[0x1E48]="\u1E49";
 	 CF[0x1E4A]="\u1E4B";
 	 CF[0x1E4C]="\u1E4D";
 	 CF[0x1E4E]="\u1E4F";
 	 CF[0x1E50]="\u1E51";
 	 CF[0x1E52]="\u1E53";
 	 CF[0x1E54]="\u1E55";
 	 CF[0x1E56]="\u1E57";
 	 CF[0x1E58]="\u1E59";
 	 CF[0x1E5A]="\u1E5B";
 	 CF[0x1E5C]="\u1E5D";
 	 CF[0x1E5E]="\u1E5F";
 	 CF[0x1E60]="\u1E61";
 	 CF[0x1E62]="\u1E63";
 	 CF[0x1E64]="\u1E65";
 	 CF[0x1E66]="\u1E67";
 	 CF[0x1E68]="\u1E69";
 	 CF[0x1E6A]="\u1E6B";
 	 CF[0x1E6C]="\u1E6D";
 	 CF[0x1E6E]="\u1E6F";
 	 CF[0x1E70]="\u1E71";
 	 CF[0x1E72]="\u1E73";
 	 CF[0x1E74]="\u1E75";
 	 CF[0x1E76]="\u1E77";
 	 CF[0x1E78]="\u1E79";
 	 CF[0x1E7A]="\u1E7B";
 	 CF[0x1E7C]="\u1E7D";
 	 CF[0x1E7E]="\u1E7F";
 	 CF[0x1E80]="\u1E81";
 	 CF[0x1E82]="\u1E83";
 	 CF[0x1E84]="\u1E85";
 	 CF[0x1E86]="\u1E87";
 	 CF[0x1E88]="\u1E89";
 	 CF[0x1E8A]="\u1E8B";
 	 CF[0x1E8C]="\u1E8D";
 	 CF[0x1E8E]="\u1E8F";
 	 CF[0x1E90]="\u1E91";
 	 CF[0x1E92]="\u1E93";
 	 CF[0x1E94]="\u1E95";
 	 CF[0x1E96]="\u0068\u0331";
 	 CF[0x1E97]="\u0074\u0308";
 	 CF[0x1E98]="\u0077\u030A";
 	 CF[0x1E99]="\u0079\u030A";
 	 CF[0x1E9A]="\u0061\u02BE";
 	 CF[0x1E9B]="\u1E61";
 	 CF[0x1EA0]="\u1EA1";
 	 CF[0x1EA2]="\u1EA3";
 	 CF[0x1EA4]="\u1EA5";
 	 CF[0x1EA6]="\u1EA7";
 	 CF[0x1EA8]="\u1EA9";
 	 CF[0x1EAA]="\u1EAB";
 	 CF[0x1EAC]="\u1EAD";
 	 CF[0x1EAE]="\u1EAF";
 	 CF[0x1EB0]="\u1EB1";
 	 CF[0x1EB2]="\u1EB3";
 	 CF[0x1EB4]="\u1EB5";
 	 CF[0x1EB6]="\u1EB7";
 	 CF[0x1EB8]="\u1EB9";
 	 CF[0x1EBA]="\u1EBB";
 	 CF[0x1EBC]="\u1EBD";
 	 CF[0x1EBE]="\u1EBF";
 	 CF[0x1EC0]="\u1EC1";
 	 CF[0x1EC2]="\u1EC3";
 	 CF[0x1EC4]="\u1EC5";
 	 CF[0x1EC6]="\u1EC7";
 	 CF[0x1EC8]="\u1EC9";
 	 CF[0x1ECA]="\u1ECB";
 	 CF[0x1ECC]="\u1ECD";
 	 CF[0x1ECE]="\u1ECF";
 	 CF[0x1ED0]="\u1ED1";
 	 CF[0x1ED2]="\u1ED3";
 	 CF[0x1ED4]="\u1ED5";
 	 CF[0x1ED6]="\u1ED7";
 	 CF[0x1ED8]="\u1ED9";
 	 CF[0x1EDA]="\u1EDB";
 	 CF[0x1EDC]="\u1EDD";
 	 CF[0x1EDE]="\u1EDF";
 	 CF[0x1EE0]="\u1EE1";
 	 CF[0x1EE2]="\u1EE3";
 	 CF[0x1EE4]="\u1EE5";
 	 CF[0x1EE6]="\u1EE7";
 	 CF[0x1EE8]="\u1EE9";
 	 CF[0x1EEA]="\u1EEB";
 	 CF[0x1EEC]="\u1EED";
 	 CF[0x1EEE]="\u1EEF";
 	 CF[0x1EF0]="\u1EF1";
 	 CF[0x1EF2]="\u1EF3";
 	 CF[0x1EF4]="\u1EF5";
 	 CF[0x1EF6]="\u1EF7";
 	 CF[0x1EF8]="\u1EF9";
 	 CF[0x1F08]="\u1F00";
 	 CF[0x1F09]="\u1F01";
 	 CF[0x1F0A]="\u1F02";
 	 CF[0x1F0B]="\u1F03";
 	 CF[0x1F0C]="\u1F04";
 	 CF[0x1F0D]="\u1F05";
 	 CF[0x1F0E]="\u1F06";
 	 CF[0x1F0F]="\u1F07";
 	 CF[0x1F18]="\u1F10";
 	 CF[0x1F19]="\u1F11";
 	 CF[0x1F1A]="\u1F12";
 	 CF[0x1F1B]="\u1F13";
 	 CF[0x1F1C]="\u1F14";
 	 CF[0x1F1D]="\u1F15";
 	 CF[0x1F28]="\u1F20";
 	 CF[0x1F29]="\u1F21";
 	 CF[0x1F2A]="\u1F22";
 	 CF[0x1F2B]="\u1F23";
 	 CF[0x1F2C]="\u1F24";
 	 CF[0x1F2D]="\u1F25";
 	 CF[0x1F2E]="\u1F26";
 	 CF[0x1F2F]="\u1F27";
 	 CF[0x1F38]="\u1F30";
 	 CF[0x1F39]="\u1F31";
 	 CF[0x1F3A]="\u1F32";
 	 CF[0x1F3B]="\u1F33";
 	 CF[0x1F3C]="\u1F34";
 	 CF[0x1F3D]="\u1F35";
 	 CF[0x1F3E]="\u1F36";
 	 CF[0x1F3F]="\u1F37";
 	 CF[0x1F48]="\u1F40";
 	 CF[0x1F49]="\u1F41";
 	 CF[0x1F4A]="\u1F42";
 	 CF[0x1F4B]="\u1F43";
 	 CF[0x1F4C]="\u1F44";
 	 CF[0x1F4D]="\u1F45";
 	 CF[0x1F50]="\u03C5\u0313";
 	 CF[0x1F52]="\u03C5\u0313\u0300";
 	 CF[0x1F54]="\u03C5\u0313\u0301";
 	 CF[0x1F56]="\u03C5\u0313\u0342";
 	 CF[0x1F59]="\u1F51";
 	 CF[0x1F5B]="\u1F53";
 	 CF[0x1F5D]="\u1F55";
 	 CF[0x1F5F]="\u1F57";
 	 CF[0x1F68]="\u1F60";
 	 CF[0x1F69]="\u1F61";
 	 CF[0x1F6A]="\u1F62";
 	 CF[0x1F6B]="\u1F63";
 	 CF[0x1F6C]="\u1F64";
 	 CF[0x1F6D]="\u1F65";
 	 CF[0x1F6E]="\u1F66";
 	 CF[0x1F6F]="\u1F67";
 	 CF[0x1F80]="\u1F00\u03B9";
 	 CF[0x1F81]="\u1F01\u03B9";
 	 CF[0x1F82]="\u1F02\u03B9";
 	 CF[0x1F83]="\u1F03\u03B9";
 	 CF[0x1F84]="\u1F04\u03B9";
 	 CF[0x1F85]="\u1F05\u03B9";
 	 CF[0x1F86]="\u1F06\u03B9";
 	 CF[0x1F87]="\u1F07\u03B9";
 	 CF[0x1F88]="\u1F00\u03B9";
 	 CF[0x1F89]="\u1F01\u03B9";
 	 CF[0x1F8A]="\u1F02\u03B9";
 	 CF[0x1F8B]="\u1F03\u03B9";
 	 CF[0x1F8C]="\u1F04\u03B9";
 	 CF[0x1F8D]="\u1F05\u03B9";
 	 CF[0x1F8E]="\u1F06\u03B9";
 	 CF[0x1F8F]="\u1F07\u03B9";
 	 CF[0x1F90]="\u1F20\u03B9";
 	 CF[0x1F91]="\u1F21\u03B9";
 	 CF[0x1F92]="\u1F22\u03B9";
 	 CF[0x1F93]="\u1F23\u03B9";
 	 CF[0x1F94]="\u1F24\u03B9";
 	 CF[0x1F95]="\u1F25\u03B9";
 	 CF[0x1F96]="\u1F26\u03B9";
 	 CF[0x1F97]="\u1F27\u03B9";
 	 CF[0x1F98]="\u1F20\u03B9";
 	 CF[0x1F99]="\u1F21\u03B9";
 	 CF[0x1F9A]="\u1F22\u03B9";
 	 CF[0x1F9B]="\u1F23\u03B9";
 	 CF[0x1F9C]="\u1F24\u03B9";
 	 CF[0x1F9D]="\u1F25\u03B9";
 	 CF[0x1F9E]="\u1F26\u03B9";
 	 CF[0x1F9F]="\u1F27\u03B9";
 	 CF[0x1FA0]="\u1F60\u03B9";
 	 CF[0x1FA1]="\u1F61\u03B9";
 	 CF[0x1FA2]="\u1F62\u03B9";
 	 CF[0x1FA3]="\u1F63\u03B9";
 	 CF[0x1FA4]="\u1F64\u03B9";
 	 CF[0x1FA5]="\u1F65\u03B9";
 	 CF[0x1FA6]="\u1F66\u03B9";
 	 CF[0x1FA7]="\u1F67\u03B9";
 	 CF[0x1FA8]="\u1F60\u03B9";
 	 CF[0x1FA9]="\u1F61\u03B9";
 	 CF[0x1FAA]="\u1F62\u03B9";
 	 CF[0x1FAB]="\u1F63\u03B9";
 	 CF[0x1FAC]="\u1F64\u03B9";
 	 CF[0x1FAD]="\u1F65\u03B9";
 	 CF[0x1FAE]="\u1F66\u03B9";
 	 CF[0x1FAF]="\u1F67\u03B9";
 	 CF[0x1FB2]="\u1F70\u03B9";
 	 CF[0x1FB3]="\u03B1\u03B9";
 	 CF[0x1FB4]="\u03AC\u03B9";
 	 CF[0x1FB6]="\u03B1\u0342";
 	 CF[0x1FB7]="\u03B1\u0342\u03B9";
 	 CF[0x1FB8]="\u1FB0";
 	 CF[0x1FB9]="\u1FB1";
 	 CF[0x1FBA]="\u1F70";
 	 CF[0x1FBB]="\u1F71";
 	 CF[0x1FBC]="\u03B1\u03B9";
 	 CF[0x1FBE]="\u03B9";
 	 CF[0x1FC2]="\u1F74\u03B9";
 	 CF[0x1FC3]="\u03B7\u03B9";
 	 CF[0x1FC4]="\u03AE\u03B9";
 	 CF[0x1FC6]="\u03B7\u0342";
 	 CF[0x1FC7]="\u03B7\u0342\u03B9";
 	 CF[0x1FC8]="\u1F72";
 	 CF[0x1FC9]="\u1F73";
 	 CF[0x1FCA]="\u1F74";
 	 CF[0x1FCB]="\u1F75";
 	 CF[0x1FCC]="\u03B7\u03B9";
 	 CF[0x1FD2]="\u03B9\u0308\u0300";
 	 CF[0x1FD3]="\u03B9\u0308\u0301";
 	 CF[0x1FD6]="\u03B9\u0342";
 	 CF[0x1FD7]="\u03B9\u0308\u0342";
 	 CF[0x1FD8]="\u1FD0";
 	 CF[0x1FD9]="\u1FD1";
 	 CF[0x1FDA]="\u1F76";
 	 CF[0x1FDB]="\u1F77";
 	 CF[0x1FE2]="\u03C5\u0308\u0300";
 	 CF[0x1FE3]="\u03C5\u0308\u0301";
 	 CF[0x1FE4]="\u03C1\u0313";
 	 CF[0x1FE6]="\u03C5\u0342";
 	 CF[0x1FE7]="\u03C5\u0308\u0342";
 	 CF[0x1FE8]="\u1FE0";
 	 CF[0x1FE9]="\u1FE1";
 	 CF[0x1FEA]="\u1F7A";
 	 CF[0x1FEB]="\u1F7B";
 	 CF[0x1FEC]="\u1FE5";
 	 CF[0x1FF2]="\u1F7C\u03B9";
 	 CF[0x1FF3]="\u03C9\u03B9";
 	 CF[0x1FF4]="\u03CE\u03B9";
 	 CF[0x1FF6]="\u03C9\u0342";
 	 CF[0x1FF7]="\u03C9\u0342\u03B9";
 	 CF[0x1FF8]="\u1F78";
 	 CF[0x1FF9]="\u1F79";
 	 CF[0x1FFA]="\u1F7C";
 	 CF[0x1FFB]="\u1F7D";
 	 CF[0x1FFC]="\u03C9\u03B9";
 	 CF[0x2126]="\u03C9";
 	 CF[0x212A]="\u006B";
 	 CF[0x212B]="\u00E5";
 	 CF[0x2160]="\u2170";
 	 CF[0x2161]="\u2171";
 	 CF[0x2162]="\u2172";
 	 CF[0x2163]="\u2173";
 	 CF[0x2164]="\u2174";
 	 CF[0x2165]="\u2175";
 	 CF[0x2166]="\u2176";
 	 CF[0x2167]="\u2177";
 	 CF[0x2168]="\u2178";
 	 CF[0x2169]="\u2179";
 	 CF[0x216A]="\u217A";
 	 CF[0x216B]="\u217B";
 	 CF[0x216C]="\u217C";
 	 CF[0x216D]="\u217D";
 	 CF[0x216E]="\u217E";
 	 CF[0x216F]="\u217F";
 	 CF[0x24B6]="\u24D0";
 	 CF[0x24B7]="\u24D1";
 	 CF[0x24B8]="\u24D2";
 	 CF[0x24B9]="\u24D3";
 	 CF[0x24BA]="\u24D4";
 	 CF[0x24BB]="\u24D5";
 	 CF[0x24BC]="\u24D6";
 	 CF[0x24BD]="\u24D7";
 	 CF[0x24BE]="\u24D8";
 	 CF[0x24BF]="\u24D9";
 	 CF[0x24C0]="\u24DA";
 	 CF[0x24C1]="\u24DB";
 	 CF[0x24C2]="\u24DC";
 	 CF[0x24C3]="\u24DD";
 	 CF[0x24C4]="\u24DE";
 	 CF[0x24C5]="\u24DF";
 	 CF[0x24C6]="\u24E0";
 	 CF[0x24C7]="\u24E1";
 	 CF[0x24C8]="\u24E2";
 	 CF[0x24C9]="\u24E3";
 	 CF[0x24CA]="\u24E4";
 	 CF[0x24CB]="\u24E5";
 	 CF[0x24CC]="\u24E6";
 	 CF[0x24CD]="\u24E7";
 	 CF[0x24CE]="\u24E8";
 	 CF[0x24CF]="\u24E9";
 	 CF[0xFB00]="\u0066\u0066";
 	 CF[0xFB01]="\u0066\u0069";
 	 CF[0xFB02]="\u0066\u006C";
 	 CF[0xFB03]="\u0066\u0066\u0069";
 	 CF[0xFB04]="\u0066\u0066\u006C";
 	 CF[0xFB05]="\u0073\u0074";
 	 CF[0xFB06]="\u0073\u0074";
 	 CF[0xFB13]="\u0574\u0576";
 	 CF[0xFB14]="\u0574\u0565";
 	 CF[0xFB15]="\u0574\u056B";
 	 CF[0xFB16]="\u057E\u0576";
 	 CF[0xFB17]="\u0574\u056D";
 	 CF[0xFF21]="\uFF41";
 	 CF[0xFF22]="\uFF42";
 	 CF[0xFF23]="\uFF43";
 	 CF[0xFF24]="\uFF44";
 	 CF[0xFF25]="\uFF45";
 	 CF[0xFF26]="\uFF46";
 	 CF[0xFF27]="\uFF47";
 	 CF[0xFF28]="\uFF48";
 	 CF[0xFF29]="\uFF49";
 	 CF[0xFF2A]="\uFF4A";
 	 CF[0xFF2B]="\uFF4B";
 	 CF[0xFF2C]="\uFF4C";
 	 CF[0xFF2D]="\uFF4D";
 	 CF[0xFF2E]="\uFF4E";
 	 CF[0xFF2F]="\uFF4F";
 	 CF[0xFF30]="\uFF50";
 	 CF[0xFF31]="\uFF51";
 	 CF[0xFF32]="\uFF52";
 	 CF[0xFF33]="\uFF53";
 	 CF[0xFF34]="\uFF54";
 	 CF[0xFF35]="\uFF55";
 	 CF[0xFF36]="\uFF56";
 	 CF[0xFF37]="\uFF57";
 	 CF[0xFF38]="\uFF58";
 	 CF[0xFF39]="\uFF59";
 	 CF[0xFF3A]="\uFF5A";
 // 785 case foldings total
 }
 }
--- a/tools/unicodetools/com/ibm/text/UCA/CheckCollationValidity.html
+++ b/tools/unicodetools/com/ibm/text/UCA/CheckCollationValidity.html
@ -1,369 +0,0 @@
 <html><body>
 <h1
 >1. Mismatches when NFD is OFF</h1><h2
 >Date:Mon Jun 03 08:45:38 PDT 2002</h2><h2
 >File Version:-3.1.1d1</h2><p
 >Alternate Handling = NON_IGNORABLE</p><table border="1"
 ><caption
  >Mismatches in UCA-NOD: Plain vs NFC: 4</caption><tr
  ><th
   >Code</th><th
   >Type</th><th
   >CC?</th><th
   >Key</th></tr><tr
  ><th rowSpan="2" align="right"
   >F951 CJK COMPATIBILITY IDEOGRAPH-F951<br
    ></br>NFC=964B</th><th
   >Plain</th><th
   >n</th><td
   >[FF41 96FB | 0020 0020 | 0002 0002]</td></tr><tr
  ><th
   >NFC</th><th
   >ERROR</th><td
   >[FF41 964B | 0020 0020 | 0002 0002]</td></tr><tr
  ><th rowSpan="2" align="right"
   >FB1F HEBREW LIGATURE YIDDISH YOD YOD PATAH<br
    ></br>NFC=05F2 05B7</th><th
   >Plain</th><th
   >n</th><td
   >[0EC0 0EC0 | 0020 0020 00B2 | 0004 0004 001F]</td></tr><tr
  ><th
   >NFC</th><th
   >Y</th><td
   >[0EC0 0EC0 | 0020 0020 00B2 | 0004 0004 0002]</td></tr><tr
  ><th rowSpan="2" align="right"
   >FB3A HEBREW LETTER FINAL KAF WITH DAGESH<br
    ></br>NFC=05DA 05BC</th><th
   >Plain</th><th
   >n</th><td
   >[0EC1 | 0020 00B6 | 0019 0019]</td></tr><tr
  ><th
   >NFC</th><th
   >Y</th><td
   >[0EC1 | 0020 00B6 | 0019 0002]</td></tr><tr
  ><th rowSpan="2" align="right"
   >FB43 HEBREW LETTER FINAL PE WITH DAGESH<br
    ></br>NFC=05E3 05BC</th><th
   >Plain</th><th
   >n</th><td
   >[0EC7 | 0020 00B6 | 0019 0019]</td></tr><tr
  ><th
   >NFC</th><th
   >Y</th><td
   >[0EC7 | 0020 00B6 | 0019 0002]</td></tr></table><br>
 <h1>2. Differences in Ordering</h1>
 <p>Codes and names are in the white rows: bold means that the NO-NFD sort key differs from UCA key.</p>
 <p>Keys are in the light blue rows: green is the bad key, blue is UCA, black is where they equal.</p>
 <table border='1'>
 <tr><th>File Order</th><th>Code and Decomp</th><th>Key and Decomp-Key</th></tr>
 <tr><td colspan='3'></td><tr>
 <tr><td>12573</td><td>F951 CJK COMPATIBILITY IDEOGRAPH-F951<br><964B> </td><td>
 <font color='#009900'>[FF41 96FB | 0020 0020 | 0002 0002 | |]</font><br><font color='#000099'>[FF41 964B | 0020 0020 | 0002 0002 | |]</font>
 </td></tr>
 <tr><td>12574</td><td>FA09 CJK COMPATIBILITY IDEOGRAPH-FA09<br><964D> </td><td>
 [FF41 964D | 0020 0020 | 0002 0002 | |]
 </td></tr>
 </table>
 <h2>3. Primaries Incompatible with Decompositions</h2><table border='1'>
 <tr><th>Code</td><th>Sort Key</th><th>Decomposed Sort Key</th><th>Name</th></tr>
 <tr><td>00A8</td><td>[0214]</td><td>[0209]</td><td>DIAERESIS</td></tr>
 <tr><td>00AF</td><td>[0210]</td><td>[0209]</td><td>MACRON</td></tr>
 <tr><td>00B4</td><td>[020D]</td><td>[0209]</td><td>ACUTE ACCENT</td></tr>
 <tr><td>00B8</td><td>[0219]</td><td>[0209]</td><td>CEDILLA</td></tr>
 <tr><td>02D8</td><td>[0212]</td><td>[0209]</td><td>BREVE</td></tr>
 <tr><td>02D9</td><td>[0213]</td><td>[0209]</td><td>DOT ABOVE</td></tr>
 <tr><td>02DA</td><td>[0215]</td><td>[0209]</td><td>RING ABOVE</td></tr>
 <tr><td>02DB</td><td>[021A]</td><td>[0209]</td><td>OGONEK</td></tr>
 <tr><td>02DC</td><td>[020E]</td><td>[0209]</td><td>SMALL TILDE</td></tr>
 <tr><td>02DD</td><td>[0216]</td><td>[0209]</td><td>DOUBLE ACUTE ACCENT</td></tr>
 <tr><td>037A</td><td>[0C9B]</td><td>[0209]</td><td>GREEK YPOGEGRAMMENI</td></tr>
 <tr><td>0384</td><td>[020D]</td><td>[0209]</td><td>GREEK TONOS</td></tr>
 <tr><td>0385</td><td>[0214]</td><td>[0209]</td><td>GREEK DIALYTIKA TONOS</td></tr>
 <tr><td>0CCB</td><td>[12C4]</td><td>[12C3 12C7]</td><td>KANNADA VOWEL SIGN OO</td></tr>
 <tr><td>0DDD</td><td>[1353]</td><td>[1352 1346]</td><td>SINHALA VOWEL SIGN KOMBUVA HAA DIGA AELA-PILLA</td></tr>
 <tr><td>1FBD</td><td>[0217]</td><td>[0209]</td><td>GREEK KORONIS</td></tr>
 <tr><td>1FBF</td><td>[0217]</td><td>[0209]</td><td>GREEK PSILI</td></tr>
 <tr><td>1FC0</td><td>[021D]</td><td>[0209]</td><td>GREEK PERISPOMENI</td></tr>
 <tr><td>1FC1</td><td>[0214]</td><td>[0209]</td><td>GREEK DIALYTIKA AND PERISPOMENI</td></tr>
 <tr><td>1FCD</td><td>[0217]</td><td>[0209]</td><td>GREEK PSILI AND VARIA</td></tr>
 <tr><td>1FCE</td><td>[0217]</td><td>[0209]</td><td>GREEK PSILI AND OXIA</td></tr>
 <tr><td>1FCF</td><td>[0217]</td><td>[0209]</td><td>GREEK PSILI AND PERISPOMENI</td></tr>
 <tr><td>1FDD</td><td>[0218]</td><td>[0209]</td><td>GREEK DASIA AND VARIA</td></tr>
 <tr><td>1FDE</td><td>[0218]</td><td>[0209]</td><td>GREEK DASIA AND OXIA</td></tr>
 <tr><td>1FDF</td><td>[0218]</td><td>[0209]</td><td>GREEK DASIA AND PERISPOMENI</td></tr>
 <tr><td>1FED</td><td>[0214]</td><td>[0209]</td><td>GREEK DIALYTIKA AND VARIA</td></tr>
 <tr><td>1FEE</td><td>[0214]</td><td>[0209]</td><td>GREEK DIALYTIKA AND OXIA</td></tr>
 <tr><td>1FFD</td><td>[020D]</td><td>[0209]</td><td>GREEK OXIA</td></tr>
 <tr><td>1FFE</td><td>[0218]</td><td>[0209]</td><td>GREEK DASIA</td></tr>
 <tr><td>2017</td><td>[021C]</td><td>[0209]</td><td>DOUBLE LOW LINE</td></tr>
 <tr><td>203E</td><td>[0211]</td><td>[0209]</td><td>OVERLINE</td></tr>
 <tr><td>2047</td><td>[FFC0 A047]</td><td>[024E 024E]</td><td>DOUBLE QUESTION MARK</td></tr>
 <tr><td>2057</td><td>[FFC0 A057]</td><td>[02B6 02B6 02B6 02B6]</td><td>QUADRUPLE PRIME</td></tr>
 <tr><td>205F</td><td>[FFC0 A05F]</td><td>[0209]</td><td>MEDIUM MATHEMATICAL SPACE</td></tr>
 <tr><td>2071</td><td>[FFC0 A071]</td><td>[0AD3]</td><td>SUPERSCRIPT LATIN SMALL LETTER I</td></tr>
 <tr><td>213D</td><td>[FFC0 A13D]</td><td>[0C93]</td><td>DOUBLE-STRUCK SMALL GAMMA</td></tr>
 <tr><td>213E</td><td>[FFC0 A13E]</td><td>[0C93]</td><td>DOUBLE-STRUCK CAPITAL GAMMA</td></tr>
 <tr><td>213F</td><td>[FFC0 A13F]</td><td>[0CA3]</td><td>DOUBLE-STRUCK CAPITAL PI</td></tr>
 <tr><td>2140</td><td>[FFC0 A140]</td><td>[039E]</td><td>DOUBLE-STRUCK N-ARY SUMMATION</td></tr>
 <tr><td>2145</td><td>[FFC0 A145]</td><td>[0A49]</td><td>DOUBLE-STRUCK ITALIC CAPITAL D</td></tr>
 <tr><td>2146</td><td>[FFC0 A146]</td><td>[0A49]</td><td>DOUBLE-STRUCK ITALIC SMALL D</td></tr>
 <tr><td>2147</td><td>[FFC0 A147]</td><td>[0A65]</td><td>DOUBLE-STRUCK ITALIC SMALL E</td></tr>
 <tr><td>2148</td><td>[FFC0 A148]</td><td>[0AD3]</td><td>DOUBLE-STRUCK ITALIC SMALL I</td></tr>
 <tr><td>2149</td><td>[FFC0 A149]</td><td>[0AE7]</td><td>DOUBLE-STRUCK ITALIC SMALL J</td></tr>
 <tr><td>2A0C</td><td>[FFC0 AA0C]</td><td>[03C2 03C2 03C2 03C2]</td><td>QUADRUPLE INTEGRAL OPERATOR</td></tr>
 <tr><td>2A74</td><td>[FFC0 AA74]</td><td>[0237 0237 03A4]</td><td>DOUBLE COLON EQUAL</td></tr>
 <tr><td>2A75</td><td>[FFC0 AA75]</td><td>[03A4 03A4]</td><td>TWO CONSECUTIVE EQUALS SIGNS</td></tr>
 <tr><td>2A76</td><td>[FFC0 AA76]</td><td>[03A4 03A4 03A4]</td><td>THREE CONSECUTIVE EQUALS SIGNS</td></tr>
 <tr><td>2ADC</td><td>[FFC0 AADC]</td><td>[FFC0 AADD]</td><td>FORKING</td></tr>
 <tr><td>309B</td><td>[021E]</td><td>[0209]</td><td>KATAKANA-HIRAGANA VOICED SOUND MARK</td></tr>
 <tr><td>309C</td><td>[021F]</td><td>[0209]</td><td>KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK</td></tr>
 <tr><td>309F</td><td>[FFC0 B09F]</td><td>[1946 1948]</td><td>HIRAGANA DIGRAPH YORI</td></tr>
 <tr><td>30FF</td><td>[FFC0 B0FF]</td><td>[192A 1934]</td><td>KATAKANA DIGRAPH KOTO</td></tr>
 <tr><td>3251</td><td>[FFC0 B251]</td><td>[0A0D 0A0C]</td><td>CIRCLED NUMBER TWENTY ONE</td></tr>
 <tr><td>3252</td><td>[FFC0 B252]</td><td>[0A0D 0A0D]</td><td>CIRCLED NUMBER TWENTY TWO</td></tr>
 <tr><td>3253</td><td>[FFC0 B253]</td><td>[0A0D 0A0E]</td><td>CIRCLED NUMBER TWENTY THREE</td></tr>
 <tr><td>3254</td><td>[FFC0 B254]</td><td>[0A0D 0A0F]</td><td>CIRCLED NUMBER TWENTY FOUR</td></tr>
 <tr><td>3255</td><td>[FFC0 B255]</td><td>[0A0D 0A10]</td><td>CIRCLED NUMBER TWENTY FIVE</td></tr>
 <tr><td>3256</td><td>[FFC0 B256]</td><td>[0A0D 0A11]</td><td>CIRCLED NUMBER TWENTY SIX</td></tr>
 <tr><td>3257</td><td>[FFC0 B257]</td><td>[0A0D 0A12]</td><td>CIRCLED NUMBER TWENTY SEVEN</td></tr>
 <tr><td>3258</td><td>[FFC0 B258]</td><td>[0A0D 0A13]</td><td>CIRCLED NUMBER TWENTY EIGHT</td></tr>
 <tr><td>3259</td><td>[FFC0 B259]</td><td>[0A0D 0A14]</td><td>CIRCLED NUMBER TWENTY NINE</td></tr>
 <tr><td>325A</td><td>[FFC0 B25A]</td><td>[0A0E 0A0B]</td><td>CIRCLED NUMBER THIRTY</td></tr>
 <tr><td>325B</td><td>[FFC0 B25B]</td><td>[0A0E 0A0C]</td><td>CIRCLED NUMBER THIRTY ONE</td></tr>
 <tr><td>325C</td><td>[FFC0 B25C]</td><td>[0A0E 0A0D]</td><td>CIRCLED NUMBER THIRTY TWO</td></tr>
 <tr><td>325D</td><td>[FFC0 B25D]</td><td>[0A0E 0A0E]</td><td>CIRCLED NUMBER THIRTY THREE</td></tr>
 <tr><td>325E</td><td>[FFC0 B25E]</td><td>[0A0E 0A0F]</td><td>CIRCLED NUMBER THIRTY FOUR</td></tr>
 <tr><td>325F</td><td>[FFC0 B25F]</td><td>[0A0E 0A10]</td><td>CIRCLED NUMBER THIRTY FIVE</td></tr>
 <tr><td>32B1</td><td>[FFC0 B2B1]</td><td>[0A0E 0A11]</td><td>CIRCLED NUMBER THIRTY SIX</td></tr>
 <tr><td>32B2</td><td>[FFC0 B2B2]</td><td>[0A0E 0A12]</td><td>CIRCLED NUMBER THIRTY SEVEN</td></tr>
 <tr><td>32B3</td><td>[FFC0 B2B3]</td><td>[0A0E 0A13]</td><td>CIRCLED NUMBER THIRTY EIGHT</td></tr>
 <tr><td>32B4</td><td>[FFC0 B2B4]</td><td>[0A0E 0A14]</td><td>CIRCLED NUMBER THIRTY NINE</td></tr>
 <tr><td>32B5</td><td>[FFC0 B2B5]</td><td>[0A0F 0A0B]</td><td>CIRCLED NUMBER FORTY</td></tr>
 <tr><td>32B6</td><td>[FFC0 B2B6]</td><td>[0A0F 0A0C]</td><td>CIRCLED NUMBER FORTY ONE</td></tr>
 <tr><td>32B7</td><td>[FFC0 B2B7]</td><td>[0A0F 0A0D]</td><td>CIRCLED NUMBER FORTY TWO</td></tr>
 <tr><td>32B8</td><td>[FFC0 B2B8]</td><td>[0A0F 0A0E]</td><td>CIRCLED NUMBER FORTY THREE</td></tr>
 <tr><td>32B9</td><td>[FFC0 B2B9]</td><td>[0A0F 0A0F]</td><td>CIRCLED NUMBER FORTY FOUR</td></tr>
 <tr><td>32BA</td><td>[FFC0 B2BA]</td><td>[0A0F 0A10]</td><td>CIRCLED NUMBER FORTY FIVE</td></tr>
 <tr><td>32BB</td><td>[FFC0 B2BB]</td><td>[0A0F 0A11]</td><td>CIRCLED NUMBER FORTY SIX</td></tr>
 <tr><td>32BC</td><td>[FFC0 B2BC]</td><td>[0A0F 0A12]</td><td>CIRCLED NUMBER FORTY SEVEN</td></tr>
 <tr><td>32BD</td><td>[FFC0 B2BD]</td><td>[0A0F 0A13]</td><td>CIRCLED NUMBER FORTY EIGHT</td></tr>
 <tr><td>32BE</td><td>[FFC0 B2BE]</td><td>[0A0F 0A14]</td><td>CIRCLED NUMBER FORTY NINE</td></tr>
 <tr><td>32BF</td><td>[FFC0 B2BF]</td><td>[0A10 0A0B]</td><td>CIRCLED NUMBER FIFTY</td></tr>
 <tr><td>F951</td><td>[FF41 96FB]</td><td>[FF41 964B]</td><td>CJK COMPATIBILITY IDEOGRAPH-F951</td></tr>
 <tr><td>FA30</td><td>[FFC1 FA30]</td><td>[FF40 CFAE]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA30</td></tr>
 <tr><td>FA31</td><td>[FFC1 FA31]</td><td>[FF40 D0E7]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA31</td></tr>
 <tr><td>FA32</td><td>[FFC1 FA32]</td><td>[FF40 D14D]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA32</td></tr>
 <tr><td>FA33</td><td>[FFC1 FA33]</td><td>[FF40 D2C9]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA33</td></tr>
 <tr><td>FA34</td><td>[FFC1 FA34]</td><td>[FF40 D2E4]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA34</td></tr>
 <tr><td>FA35</td><td>[FFC1 FA35]</td><td>[FF40 D351]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA35</td></tr>
 <tr><td>FA36</td><td>[FFC1 FA36]</td><td>[FF40 D59D]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA36</td></tr>
 <tr><td>FA37</td><td>[FFC1 FA37]</td><td>[FF40 D606]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA37</td></tr>
 <tr><td>FA38</td><td>[FFC1 FA38]</td><td>[FF40 D668]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA38</td></tr>
 <tr><td>FA39</td><td>[FFC1 FA39]</td><td>[FF40 D840]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA39</td></tr>
 <tr><td>FA3A</td><td>[FFC1 FA3A]</td><td>[FF40 D8A8]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA3A</td></tr>
 <tr><td>FA3B</td><td>[FFC1 FA3B]</td><td>[FF40 DC64]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA3B</td></tr>
 <tr><td>FA3C</td><td>[FFC1 FA3C]</td><td>[FF40 DC6E]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA3C</td></tr>
 <tr><td>FA3D</td><td>[FFC1 FA3D]</td><td>[FF40 E094]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA3D</td></tr>
 <tr><td>FA3E</td><td>[FFC1 FA3E]</td><td>[FF40 E168]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA3E</td></tr>
 <tr><td>FA3F</td><td>[FFC1 FA3F]</td><td>[FF40 E18E]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA3F</td></tr>
 <tr><td>FA40</td><td>[FFC1 FA40]</td><td>[FF40 E1F2]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA40</td></tr>
 <tr><td>FA41</td><td>[FFC1 FA41]</td><td>[FF40 E54F]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA41</td></tr>
 <tr><td>FA42</td><td>[FFC1 FA42]</td><td>[FF40 E5E2]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA42</td></tr>
 <tr><td>FA43</td><td>[FFC1 FA43]</td><td>[FF40 E691]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA43</td></tr>
 <tr><td>FA44</td><td>[FFC1 FA44]</td><td>[FF40 E885]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA44</td></tr>
 <tr><td>FA45</td><td>[FFC1 FA45]</td><td>[FF40 ED77]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA45</td></tr>
 <tr><td>FA46</td><td>[FFC1 FA46]</td><td>[FF40 EE1A]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA46</td></tr>
 <tr><td>FA47</td><td>[FFC1 FA47]</td><td>[FF40 EF22]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA47</td></tr>
 <tr><td>FA48</td><td>[FFC1 FA48]</td><td>[FF40 F16E]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA48</td></tr>
 <tr><td>FA49</td><td>[FFC1 FA49]</td><td>[FF40 F22B]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA49</td></tr>
 <tr><td>FA4A</td><td>[FFC1 FA4A]</td><td>[FF40 F422]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA4A</td></tr>
 <tr><td>FA4B</td><td>[FFC1 FA4B]</td><td>[FF40 F891]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA4B</td></tr>
 <tr><td>FA4C</td><td>[FFC1 FA4C]</td><td>[FF40 F93E]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA4C</td></tr>
 <tr><td>FA4D</td><td>[FFC1 FA4D]</td><td>[FF40 F949]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA4D</td></tr>
 <tr><td>FA4E</td><td>[FFC1 FA4E]</td><td>[FF40 F948]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA4E</td></tr>
 <tr><td>FA4F</td><td>[FFC1 FA4F]</td><td>[FF40 F950]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA4F</td></tr>
 <tr><td>FA50</td><td>[FFC1 FA50]</td><td>[FF40 F956]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA50</td></tr>
 <tr><td>FA51</td><td>[FFC1 FA51]</td><td>[FF40 F95D]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA51</td></tr>
 <tr><td>FA52</td><td>[FFC1 FA52]</td><td>[FF40 F98D]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA52</td></tr>
 <tr><td>FA53</td><td>[FFC1 FA53]</td><td>[FF40 F98E]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA53</td></tr>
 <tr><td>FA54</td><td>[FFC1 FA54]</td><td>[FF40 FA40]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA54</td></tr>
 <tr><td>FA55</td><td>[FFC1 FA55]</td><td>[FF40 FA81]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA55</td></tr>
 <tr><td>FA56</td><td>[FFC1 FA56]</td><td>[FF40 FBC0]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA56</td></tr>
 <tr><td>FA57</td><td>[FFC1 FA57]</td><td>[FF40 FDF4]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA57</td></tr>
 <tr><td>FA58</td><td>[FFC1 FA58]</td><td>[FF40 FE09]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA58</td></tr>
 <tr><td>FA59</td><td>[FFC1 FA59]</td><td>[FF40 FE41]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA59</td></tr>
 <tr><td>FA5A</td><td>[FFC1 FA5A]</td><td>[FF40 FF72]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA5A</td></tr>
 <tr><td>FA5B</td><td>[FFC1 FA5B]</td><td>[FF41 8005]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA5B</td></tr>
 <tr><td>FA5C</td><td>[FFC1 FA5C]</td><td>[FF41 81ED]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA5C</td></tr>
 <tr><td>FA5D</td><td>[FFC1 FA5D]</td><td>[FF41 8279]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA5D</td></tr>
 <tr><td>FA5E</td><td>[FFC1 FA5E]</td><td>[FF41 8279]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA5E</td></tr>
 <tr><td>FA5F</td><td>[FFC1 FA5F]</td><td>[FF41 8457]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA5F</td></tr>
 <tr><td>FA60</td><td>[FFC1 FA60]</td><td>[FF41 8910]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA60</td></tr>
 <tr><td>FA61</td><td>[FFC1 FA61]</td><td>[FF41 8996]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA61</td></tr>
 <tr><td>FA62</td><td>[FFC1 FA62]</td><td>[FF41 8B01]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA62</td></tr>
 <tr><td>FA63</td><td>[FFC1 FA63]</td><td>[FF41 8B39]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA63</td></tr>
 <tr><td>FA64</td><td>[FFC1 FA64]</td><td>[FF41 8CD3]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA64</td></tr>
 <tr><td>FA65</td><td>[FFC1 FA65]</td><td>[FF41 8D08]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA65</td></tr>
 <tr><td>FA66</td><td>[FFC1 FA66]</td><td>[FF41 8FB6]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA66</td></tr>
 <tr><td>FA67</td><td>[FFC1 FA67]</td><td>[FF41 9038]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA67</td></tr>
 <tr><td>FA68</td><td>[FFC1 FA68]</td><td>[FF41 96E3]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA68</td></tr>
 <tr><td>FA69</td><td>[FFC1 FA69]</td><td>[FF41 97FF]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA69</td></tr>
 <tr><td>FA6A</td><td>[FFC1 FA6A]</td><td>[FF41 983B]</td><td>CJK COMPATIBILITY IDEOGRAPH-FA6A</td></tr>
 <tr><td>FC5E</td><td>[]</td><td>[0209]</td><td>ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM</td></tr>
 <tr><td>FC5F</td><td>[]</td><td>[0209]</td><td>ARABIC LIGATURE SHADDA WITH KASRATAN ISOLATED FORM</td></tr>
 <tr><td>FC60</td><td>[]</td><td>[0209]</td><td>ARABIC LIGATURE SHADDA WITH FATHA ISOLATED FORM</td></tr>
 <tr><td>FC61</td><td>[]</td><td>[0209]</td><td>ARABIC LIGATURE SHADDA WITH DAMMA ISOLATED FORM</td></tr>
 <tr><td>FC62</td><td>[]</td><td>[0209]</td><td>ARABIC LIGATURE SHADDA WITH KASRA ISOLATED FORM</td></tr>
 <tr><td>FC63</td><td>[]</td><td>[0209]</td><td>ARABIC LIGATURE SHADDA WITH SUPERSCRIPT ALEF ISOLATED FORM</td></tr>
 <tr><td>FCF2</td><td>[]</td><td>[020B]</td><td>ARABIC LIGATURE SHADDA WITH FATHA MEDIAL FORM</td></tr>
 <tr><td>FCF3</td><td>[]</td><td>[020B]</td><td>ARABIC LIGATURE SHADDA WITH DAMMA MEDIAL FORM</td></tr>
 <tr><td>FCF4</td><td>[]</td><td>[020B]</td><td>ARABIC LIGATURE SHADDA WITH KASRA MEDIAL FORM</td></tr>
 <tr><td>FDFC</td><td>[FFC1 FDFC]</td><td>[0EF9 0F4A 0ED6 0F2D]</td><td>RIAL SIGN</td></tr>
 <tr><td>FE49</td><td>[0211]</td><td>[0209]</td><td>DASHED OVERLINE</td></tr>
 <tr><td>FE4A</td><td>[0211]</td><td>[0209]</td><td>CENTRELINE OVERLINE</td></tr>
 <tr><td>FE4B</td><td>[0211]</td><td>[0209]</td><td>WAVY OVERLINE</td></tr>
 <tr><td>FE4C</td><td>[0211]</td><td>[0209]</td><td>DOUBLE WAVY OVERLINE</td></tr>
 <tr><td>FE70</td><td>[]</td><td>[0209]</td><td>ARABIC FATHATAN ISOLATED FORM</td></tr>
 <tr><td>FE71</td><td>[]</td><td>[020B]</td><td>ARABIC TATWEEL WITH FATHATAN ABOVE</td></tr>
 <tr><td>FE72</td><td>[]</td><td>[0209]</td><td>ARABIC DAMMATAN ISOLATED FORM</td></tr>
 <tr><td>FE74</td><td>[]</td><td>[0209]</td><td>ARABIC KASRATAN ISOLATED FORM</td></tr>
 <tr><td>FE76</td><td>[]</td><td>[0209]</td><td>ARABIC FATHA ISOLATED FORM</td></tr>
 <tr><td>FE77</td><td>[]</td><td>[020B]</td><td>ARABIC FATHA MEDIAL FORM</td></tr>
 <tr><td>FE78</td><td>[]</td><td>[0209]</td><td>ARABIC DAMMA ISOLATED FORM</td></tr>
 <tr><td>FE79</td><td>[]</td><td>[020B]</td><td>ARABIC DAMMA MEDIAL FORM</td></tr>
 <tr><td>FE7A</td><td>[]</td><td>[0209]</td><td>ARABIC KASRA ISOLATED FORM</td></tr>
 <tr><td>FE7B</td><td>[]</td><td>[020B]</td><td>ARABIC KASRA MEDIAL FORM</td></tr>
 <tr><td>FE7C</td><td>[]</td><td>[0209]</td><td>ARABIC SHADDA ISOLATED FORM</td></tr>
 <tr><td>FE7D</td><td>[]</td><td>[020B]</td><td>ARABIC SHADDA MEDIAL FORM</td></tr>
 <tr><td>FE7E</td><td>[]</td><td>[0209]</td><td>ARABIC SUKUN ISOLATED FORM</td></tr>
 <tr><td>FE7F</td><td>[]</td><td>[020B]</td><td>ARABIC SUKUN MEDIAL FORM</td></tr>
 <tr><td>FF5F</td><td>[FFC1 FF5F]</td><td>[FFC0 A985]</td><td>FULLWIDTH LEFT WHITE PARENTHESIS</td></tr>
 <tr><td>FF60</td><td>[FFC1 FF60]</td><td>[FFC0 A986]</td><td>FULLWIDTH RIGHT WHITE PARENTHESIS</td></tr>
 <tr><td>FFE3</td><td>[0210]</td><td>[0209]</td><td>FULLWIDTH MACRON</td></tr>
 </table>
 <h2>4. Secondaries Incompatible with Decompositions</h2><table border='1'>
 <tr><th>Code</td><th>Sort Key</th><th>Decomposed Sort Key</th><th>Name</th></tr>
 <tr><td>00A8</td><td>[0214 | 0020]</td><td>[0209 | 0020 0047]</td><td>DIAERESIS</td></tr>
 <tr><td>00AF</td><td>[0210 | 0020]</td><td>[0209 | 0020 005A]</td><td>MACRON</td></tr>
 <tr><td>00B4</td><td>[020D | 0020]</td><td>[0209 | 0020 0032]</td><td>ACUTE ACCENT</td></tr>
 <tr><td>00B8</td><td>[0219 | 0020]</td><td>[0209 | 0020 0055]</td><td>CEDILLA</td></tr>
 <tr><td>017F</td><td>[0BA7 | 0020 0154]</td><td>[0BA7 | 0020]</td><td>LATIN SMALL LETTER LONG S</td></tr>
 <tr><td>02D8</td><td>[0212 | 0020]</td><td>[0209 | 0020 0037]</td><td>BREVE</td></tr>
 <tr><td>02D9</td><td>[0213 | 0020]</td><td>[0209 | 0020 0052]</td><td>DOT ABOVE</td></tr>
 <tr><td>02DA</td><td>[0215 | 0020]</td><td>[0209 | 0020 0043]</td><td>RING ABOVE</td></tr>
 <tr><td>02DB</td><td>[021A | 0020]</td><td>[0209 | 0020 0058]</td><td>OGONEK</td></tr>
 <tr><td>02DC</td><td>[020E | 0020]</td><td>[0209 | 0020 004E]</td><td>SMALL TILDE</td></tr>
 <tr><td>02DD</td><td>[0216 | 0020]</td><td>[0209 | 0020 004D]</td><td>DOUBLE ACUTE ACCENT</td></tr>
 <tr><td>037A</td><td>[0C9B | 0020]</td><td>[0209 | 0020 0096]</td><td>GREEK YPOGEGRAMMENI</td></tr>
 <tr><td>0384</td><td>[020D | 0020]</td><td>[0209 | 0020 0032]</td><td>GREEK TONOS</td></tr>
 <tr><td>0385</td><td>[0214 | 0020 0032]</td><td>[0209 | 0020 0047 0032]</td><td>GREEK DIALYTIKA TONOS</td></tr>
 <tr><td>1E9B</td><td>[0BA7 | 0020 0154 0052]</td><td>[0BA7 | 0020 0052]</td><td>LATIN SMALL LETTER LONG S WITH DOT ABOVE</td></tr>
 <tr><td>1FBD</td><td>[0217 | 0020]</td><td>[0209 | 0020 0022]</td><td>GREEK KORONIS</td></tr>
 <tr><td>1FBF</td><td>[0217 | 0020]</td><td>[0209 | 0020 0022]</td><td>GREEK PSILI</td></tr>
 <tr><td>1FC0</td><td>[021D | 0020]</td><td>[0209 | 0020 0045]</td><td>GREEK PERISPOMENI</td></tr>
 <tr><td>1FC1</td><td>[0214 | 0020 0045]</td><td>[0209 | 0020 0047 0045]</td><td>GREEK DIALYTIKA AND PERISPOMENI</td></tr>
 <tr><td>1FCD</td><td>[0217 | 0020 0035]</td><td>[0209 | 0020 0022 0035]</td><td>GREEK PSILI AND VARIA</td></tr>
 <tr><td>1FCE</td><td>[0217 | 0020 0032]</td><td>[0209 | 0020 0022 0032]</td><td>GREEK PSILI AND OXIA</td></tr>
 <tr><td>1FCF</td><td>[0217 | 0020 0045]</td><td>[0209 | 0020 0022 0045]</td><td>GREEK PSILI AND PERISPOMENI</td></tr>
 <tr><td>1FDD</td><td>[0218 | 0020 0035]</td><td>[0209 | 0020 002A 0035]</td><td>GREEK DASIA AND VARIA</td></tr>
 <tr><td>1FDE</td><td>[0218 | 0020 0032]</td><td>[0209 | 0020 002A 0032]</td><td>GREEK DASIA AND OXIA</td></tr>
 <tr><td>1FDF</td><td>[0218 | 0020 0045]</td><td>[0209 | 0020 002A 0045]</td><td>GREEK DASIA AND PERISPOMENI</td></tr>
 <tr><td>1FED</td><td>[0214 | 0020 0035]</td><td>[0209 | 0020 0047 0035]</td><td>GREEK DIALYTIKA AND VARIA</td></tr>
 <tr><td>1FEE</td><td>[0214 | 0020 0032]</td><td>[0209 | 0020 0047 0032]</td><td>GREEK DIALYTIKA AND OXIA</td></tr>
 <tr><td>1FFD</td><td>[020D | 0020]</td><td>[0209 | 0020 0032]</td><td>GREEK OXIA</td></tr>
 <tr><td>1FFE</td><td>[0218 | 0020]</td><td>[0209 | 0020 002A]</td><td>GREEK DASIA</td></tr>
 <tr><td>2017</td><td>[021C | 0020]</td><td>[0209 | 0020 008A]</td><td>DOUBLE LOW LINE</td></tr>
 <tr><td>203E</td><td>[0211 | 0020]</td><td>[0209 | 0020 005E]</td><td>OVERLINE</td></tr>
 <tr><td>2047</td><td>[FFC0 A047 | 0020 0020]</td><td>[024E 024E | 0020 0020]</td><td>DOUBLE QUESTION MARK</td></tr>
 <tr><td>2057</td><td>[FFC0 A057 | 0020 0020]</td><td>[02B6 02B6 02B6 02B6 | 0020 0020 0020 0020]</td><td>QUADRUPLE PRIME</td></tr>
 <tr><td>205F</td><td>[FFC0 A05F | 0020 0020]</td><td>[0209 | 0020]</td><td>MEDIUM MATHEMATICAL SPACE</td></tr>
 <tr><td>2071</td><td>[FFC0 A071 | 0020 0020]</td><td>[0AD3 | 0020]</td><td>SUPERSCRIPT LATIN SMALL LETTER I</td></tr>
 <tr><td>213D</td><td>[FFC0 A13D | 0020 0020]</td><td>[0C93 | 0020]</td><td>DOUBLE-STRUCK SMALL GAMMA</td></tr>
 <tr><td>213E</td><td>[FFC0 A13E | 0020 0020]</td><td>[0C93 | 0020]</td><td>DOUBLE-STRUCK CAPITAL GAMMA</td></tr>
 <tr><td>213F</td><td>[FFC0 A13F | 0020 0020]</td><td>[0CA3 | 0020]</td><td>DOUBLE-STRUCK CAPITAL PI</td></tr>
 <tr><td>2140</td><td>[FFC0 A140 | 0020 0020]</td><td>[039E | 0020]</td><td>DOUBLE-STRUCK N-ARY SUMMATION</td></tr>
 <tr><td>2145</td><td>[FFC0 A145 | 0020 0020]</td><td>[0A49 | 0020]</td><td>DOUBLE-STRUCK ITALIC CAPITAL D</td></tr>
 <tr><td>2146</td><td>[FFC0 A146 | 0020 0020]</td><td>[0A49 | 0020]</td><td>DOUBLE-STRUCK ITALIC SMALL D</td></tr>
 <tr><td>2147</td><td>[FFC0 A147 | 0020 0020]</td><td>[0A65 | 0020]</td><td>DOUBLE-STRUCK ITALIC SMALL E</td></tr>
 <tr><td>2148</td><td>[FFC0 A148 | 0020 0020]</td><td>[0AD3 | 0020]</td><td>DOUBLE-STRUCK ITALIC SMALL I</td></tr>
 <tr><td>2149</td><td>[FFC0 A149 | 0020 0020]</td><td>[0AE7 | 0020]</td><td>DOUBLE-STRUCK ITALIC SMALL J</td></tr>
 <tr><td>2A0C</td><td>[FFC0 AA0C | 0020 0020]</td><td>[03C2 03C2 03C2 03C2 | 0020 0020 0020 0020]</td><td>QUADRUPLE INTEGRAL OPERATOR</td></tr>
 <tr><td>2A74</td><td>[FFC0 AA74 | 0020 0020]</td><td>[0237 0237 03A4 | 0020 0020 0020]</td><td>DOUBLE COLON EQUAL</td></tr>
 <tr><td>2A75</td><td>[FFC0 AA75 | 0020 0020]</td><td>[03A4 03A4 | 0020 0020]</td><td>TWO CONSECUTIVE EQUALS SIGNS</td></tr>
 <tr><td>2A76</td><td>[FFC0 AA76 | 0020 0020]</td><td>[03A4 03A4 03A4 | 0020 0020 0020]</td><td>THREE CONSECUTIVE EQUALS SIGNS</td></tr>
 <tr><td>309B</td><td>[021E | 0020]</td><td>[0209 | 0020 013D]</td><td>KATAKANA-HIRAGANA VOICED SOUND MARK</td></tr>
 <tr><td>309C</td><td>[021F | 0020]</td><td>[0209 | 0020 013E]</td><td>KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK</td></tr>
 <tr><td>309F</td><td>[FFC0 B09F | 0020 0020]</td><td>[1946 1948 | 0020 0020]</td><td>HIRAGANA DIGRAPH YORI</td></tr>
 <tr><td>30FF</td><td>[FFC0 B0FF | 0020 0020]</td><td>[192A 1934 | 0020 0020]</td><td>KATAKANA DIGRAPH KOTO</td></tr>
 <tr><td>3251</td><td>[FFC0 B251 | 0020 0020]</td><td>[0A0D 0A0C | 0020 0020]</td><td>CIRCLED NUMBER TWENTY ONE</td></tr>
 <tr><td>3252</td><td>[FFC0 B252 | 0020 0020]</td><td>[0A0D 0A0D | 0020 0020]</td><td>CIRCLED NUMBER TWENTY TWO</td></tr>
 <tr><td>3253</td><td>[FFC0 B253 | 0020 0020]</td><td>[0A0D 0A0E | 0020 0020]</td><td>CIRCLED NUMBER TWENTY THREE</td></tr>
 <tr><td>3254</td><td>[FFC0 B254 | 0020 0020]</td><td>[0A0D 0A0F | 0020 0020]</td><td>CIRCLED NUMBER TWENTY FOUR</td></tr>
 <tr><td>3255</td><td>[FFC0 B255 | 0020 0020]</td><td>[0A0D 0A10 | 0020 0020]</td><td>CIRCLED NUMBER TWENTY FIVE</td></tr>
 <tr><td>3256</td><td>[FFC0 B256 | 0020 0020]</td><td>[0A0D 0A11 | 0020 0020]</td><td>CIRCLED NUMBER TWENTY SIX</td></tr>
 <tr><td>3257</td><td>[FFC0 B257 | 0020 0020]</td><td>[0A0D 0A12 | 0020 0020]</td><td>CIRCLED NUMBER TWENTY SEVEN</td></tr>
 <tr><td>3258</td><td>[FFC0 B258 | 0020 0020]</td><td>[0A0D 0A13 | 0020 0020]</td><td>CIRCLED NUMBER TWENTY EIGHT</td></tr>
 <tr><td>3259</td><td>[FFC0 B259 | 0020 0020]</td><td>[0A0D 0A14 | 0020 0020]</td><td>CIRCLED NUMBER TWENTY NINE</td></tr>
 <tr><td>325A</td><td>[FFC0 B25A | 0020 0020]</td><td>[0A0E 0A0B | 0020 0020]</td><td>CIRCLED NUMBER THIRTY</td></tr>
 <tr><td>325B</td><td>[FFC0 B25B | 0020 0020]</td><td>[0A0E 0A0C | 0020 0020]</td><td>CIRCLED NUMBER THIRTY ONE</td></tr>
 <tr><td>325C</td><td>[FFC0 B25C | 0020 0020]</td><td>[0A0E 0A0D | 0020 0020]</td><td>CIRCLED NUMBER THIRTY TWO</td></tr>
 <tr><td>325D</td><td>[FFC0 B25D | 0020 0020]</td><td>[0A0E 0A0E | 0020 0020]</td><td>CIRCLED NUMBER THIRTY THREE</td></tr>
 <tr><td>325E</td><td>[FFC0 B25E | 0020 0020]</td><td>[0A0E 0A0F | 0020 0020]</td><td>CIRCLED NUMBER THIRTY FOUR</td></tr>
 <tr><td>325F</td><td>[FFC0 B25F | 0020 0020]</td><td>[0A0E 0A10 | 0020 0020]</td><td>CIRCLED NUMBER THIRTY FIVE</td></tr>
 <tr><td>32B1</td><td>[FFC0 B2B1 | 0020 0020]</td><td>[0A0E 0A11 | 0020 0020]</td><td>CIRCLED NUMBER THIRTY SIX</td></tr>
 <tr><td>32B2</td><td>[FFC0 B2B2 | 0020 0020]</td><td>[0A0E 0A12 | 0020 0020]</td><td>CIRCLED NUMBER THIRTY SEVEN</td></tr>
 <tr><td>32B3</td><td>[FFC0 B2B3 | 0020 0020]</td><td>[0A0E 0A13 | 0020 0020]</td><td>CIRCLED NUMBER THIRTY EIGHT</td></tr>
 <tr><td>32B4</td><td>[FFC0 B2B4 | 0020 0020]</td><td>[0A0E 0A14 | 0020 0020]</td><td>CIRCLED NUMBER THIRTY NINE</td></tr>
 <tr><td>32B5</td><td>[FFC0 B2B5 | 0020 0020]</td><td>[0A0F 0A0B | 0020 0020]</td><td>CIRCLED NUMBER FORTY</td></tr>
 <tr><td>32B6</td><td>[FFC0 B2B6 | 0020 0020]</td><td>[0A0F 0A0C | 0020 0020]</td><td>CIRCLED NUMBER FORTY ONE</td></tr>
 <tr><td>32B7</td><td>[FFC0 B2B7 | 0020 0020]</td><td>[0A0F 0A0D | 0020 0020]</td><td>CIRCLED NUMBER FORTY TWO</td></tr>
 <tr><td>32B8</td><td>[FFC0 B2B8 | 0020 0020]</td><td>[0A0F 0A0E | 0020 0020]</td><td>CIRCLED NUMBER FORTY THREE</td></tr>
 <tr><td>32B9</td><td>[FFC0 B2B9 | 0020 0020]</td><td>[0A0F 0A0F | 0020 0020]</td><td>CIRCLED NUMBER FORTY FOUR</td></tr>
 <tr><td>32BA</td><td>[FFC0 B2BA | 0020 0020]</td><td>[0A0F 0A10 | 0020 0020]</td><td>CIRCLED NUMBER FORTY FIVE</td></tr>
 <tr><td>32BB</td><td>[FFC0 B2BB | 0020 0020]</td><td>[0A0F 0A11 | 0020 0020]</td><td>CIRCLED NUMBER FORTY SIX</td></tr>
 <tr><td>32BC</td><td>[FFC0 B2BC | 0020 0020]</td><td>[0A0F 0A12 | 0020 0020]</td><td>CIRCLED NUMBER FORTY SEVEN</td></tr>
 <tr><td>32BD</td><td>[FFC0 B2BD | 0020 0020]</td><td>[0A0F 0A13 | 0020 0020]</td><td>CIRCLED NUMBER FORTY EIGHT</td></tr>
 <tr><td>32BE</td><td>[FFC0 B2BE | 0020 0020]</td><td>[0A0F 0A14 | 0020 0020]</td><td>CIRCLED NUMBER FORTY NINE</td></tr>
 <tr><td>32BF</td><td>[FFC0 B2BF | 0020 0020]</td><td>[0A10 0A0B | 0020 0020]</td><td>CIRCLED NUMBER FIFTY</td></tr>
 <tr><td>FB05</td><td>[0BA7 0BBF | 0020 0154 0020]</td><td>[0BA7 0BBF | 0020 0020]</td><td>LATIN SMALL LIGATURE LONG S T</td></tr>
 <tr><td>FBA4</td><td>[0F3D | 00CC]</td><td>[0F3D | 0020 00CC]</td><td>ARABIC LETTER HEH WITH YEH ABOVE ISOLATED FORM</td></tr>
 <tr><td>FBA5</td><td>[0F3D | 00CC]</td><td>[0F3D | 0020 00CC]</td><td>ARABIC LETTER HEH WITH YEH ABOVE FINAL FORM</td></tr>
 <tr><td>FBB0</td><td>[0F4F | 00CC]</td><td>[0F4F | 0020 00CC]</td><td>ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM</td></tr>
 <tr><td>FBB1</td><td>[0F4F | 00CC]</td><td>[0F4F | 0020 00CC]</td><td>ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM</td></tr>
 <tr><td>FC5E</td><td>[| 00C8]</td><td>[0209 | 0020 00BE 00C8]</td><td>ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM</td></tr>
 <tr><td>FC5F</td><td>[| 00C8]</td><td>[0209 | 0020 00C0 00C8]</td><td>ARABIC LIGATURE SHADDA WITH KASRATAN ISOLATED FORM</td></tr>
 <tr><td>FC60</td><td>[| 00C8]</td><td>[0209 | 0020 00C2 00C8]</td><td>ARABIC LIGATURE SHADDA WITH FATHA ISOLATED FORM</td></tr>
 <tr><td>FC61</td><td>[| 00C8]</td><td>[0209 | 0020 00C4 00C8]</td><td>ARABIC LIGATURE SHADDA WITH DAMMA ISOLATED FORM</td></tr>
 <tr><td>FC62</td><td>[| 00C8]</td><td>[0209 | 0020 00C6 00C8]</td><td>ARABIC LIGATURE SHADDA WITH KASRA ISOLATED FORM</td></tr>
 <tr><td>FC63</td><td>[| 00C8 00CE]</td><td>[0209 | 0020 00C8 00CE]</td><td>ARABIC LIGATURE SHADDA WITH SUPERSCRIPT ALEF ISOLATED FORM</td></tr>
 <tr><td>FCF2</td><td>[| 00C8]</td><td>[020B | 0020 00C2 00C8]</td><td>ARABIC LIGATURE SHADDA WITH FATHA MEDIAL FORM</td></tr>
 <tr><td>FCF3</td><td>[| 00C8]</td><td>[020B | 0020 00C4 00C8]</td><td>ARABIC LIGATURE SHADDA WITH DAMMA MEDIAL FORM</td></tr>
 <tr><td>FCF4</td><td>[| 00C8]</td><td>[020B | 0020 00C6 00C8]</td><td>ARABIC LIGATURE SHADDA WITH KASRA MEDIAL FORM</td></tr>
 <tr><td>FD3C</td><td>[0ED6 | 00BD]</td><td>[0ED6 | 0020 00BD]</td><td>ARABIC LIGATURE ALEF WITH FATHATAN FINAL FORM</td></tr>
 <tr><td>FD3D</td><td>[0ED6 | 00BD]</td><td>[0ED6 | 0020 00BD]</td><td>ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM</td></tr>
 <tr><td>FDFC</td><td>[FFC1 FDFC | 0020 0020]</td><td>[0EF9 0F4A 0ED6 0F2D | 0020 0020 0020 0020]</td><td>RIAL SIGN</td></tr>
 <tr><td>FE49</td><td>[0211 | 0020]</td><td>[0209 | 0020 005E]</td><td>DASHED OVERLINE</td></tr>
 <tr><td>FE4A</td><td>[0211 | 0020]</td><td>[0209 | 0020 005E]</td><td>CENTRELINE OVERLINE</td></tr>
 <tr><td>FE4B</td><td>[0211 | 0020]</td><td>[0209 | 0020 005E]</td><td>WAVY OVERLINE</td></tr>
 <tr><td>FE4C</td><td>[0211 | 0020]</td><td>[0209 | 0020 005E]</td><td>DOUBLE WAVY OVERLINE</td></tr>
 <tr><td>FE70</td><td>[| 00BD]</td><td>[0209 | 0020 00BD]</td><td>ARABIC FATHATAN ISOLATED FORM</td></tr>
 <tr><td>FE71</td><td>[| 00BD]</td><td>[020B | 0020 00BD]</td><td>ARABIC TATWEEL WITH FATHATAN ABOVE</td></tr>
 <tr><td>FE72</td><td>[| 00BE]</td><td>[0209 | 0020 00BE]</td><td>ARABIC DAMMATAN ISOLATED FORM</td></tr>
 <tr><td>FE74</td><td>[| 00C0]</td><td>[0209 | 0020 00C0]</td><td>ARABIC KASRATAN ISOLATED FORM</td></tr>
 <tr><td>FE76</td><td>[| 00C2]</td><td>[0209 | 0020 00C2]</td><td>ARABIC FATHA ISOLATED FORM</td></tr>
 <tr><td>FE77</td><td>[| 00C2]</td><td>[020B | 0020 00C2]</td><td>ARABIC FATHA MEDIAL FORM</td></tr>
 <tr><td>FE78</td><td>[| 00C4]</td><td>[0209 | 0020 00C4]</td><td>ARABIC DAMMA ISOLATED FORM</td></tr>
 <tr><td>FE79</td><td>[| 00C4]</td><td>[020B | 0020 00C4]</td><td>ARABIC DAMMA MEDIAL FORM</td></tr>
 <tr><td>FE7A</td><td>[| 00C6]</td><td>[0209 | 0020 00C6]</td><td>ARABIC KASRA ISOLATED FORM</td></tr>
 <tr><td>FE7B</td><td>[| 00C6]</td><td>[020B | 0020 00C6]</td><td>ARABIC KASRA MEDIAL FORM</td></tr>
 <tr><td>FE7C</td><td>[| 00C8]</td><td>[0209 | 0020 00C8]</td><td>ARABIC SHADDA ISOLATED FORM</td></tr>
 <tr><td>FE7D</td><td>[| 00C8]</td><td>[020B | 0020 00C8]</td><td>ARABIC SHADDA MEDIAL FORM</td></tr>
 <tr><td>FE7E</td><td>[| 00CA]</td><td>[0209 | 0020 00CA]</td><td>ARABIC SUKUN ISOLATED FORM</td></tr>
 <tr><td>FE7F</td><td>[| 00CA]</td><td>[020B | 0020 00CA]</td><td>ARABIC SUKUN MEDIAL FORM</td></tr>
 <tr><td>FF5F</td><td>[FFC1 FF5F | 0020 0020]</td><td>[FFC0 A985 | 0020 0020]</td><td>FULLWIDTH LEFT WHITE PARENTHESIS</td></tr>
 <tr><td>FF60</td><td>[FFC1 FF60 | 0020 0020]</td><td>[FFC0 A986 | 0020 0020]</td><td>FULLWIDTH RIGHT WHITE PARENTHESIS</td></tr>
 <tr><td>FFE3</td><td>[0210 | 0020]</td><td>[0209 | 0020 005A]</td><td>FULLWIDTH MACRON</td></tr>
 </table>
 </body></html>
--- a/tools/unicodetools/com/ibm/text/UCA/GenOverlap.java
+++ b/tools/unicodetools/com/ibm/text/UCA/GenOverlap.java
@ -1,742 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/GenOverlap.java,v $ 
 * $Date: 2005/04/06 08:48:16 $ 
 * $Revision: 1.13 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCA;
 import java.util.*;
 import java.io.*;
 import com.ibm.text.UCD.*;
 import com.ibm.text.utility.*;
 import com.ibm.icu.text.UTF16;
 public class GenOverlap implements UCD_Types, UCA_Types {
    static Map completes = new TreeMap();
    static Map back = new HashMap();
    static Map initials = new HashMap();
    static int[] ces = new int[50];
    static UCA collator;
    static UCD ucd;
    static Normalizer nfd;
    static Normalizer nfkd;
    public static void validateUCA(UCA collatorIn) throws Exception {
        collator = collatorIn;
        ucd = UCD.make();
        nfd = new Normalizer(Normalizer.NFD, collatorIn.getUCDVersion());
        nfkd = new Normalizer(Normalizer.NFKD, collatorIn.getUCDVersion());
        for (int cp = 0x0; cp <= 0x10FFFF; ++cp) {
            Utility.dot(cp);
            if (!ucd.isRepresented(cp)) continue;
            byte decompType = ucd.getDecompositionType(cp);
            if (decompType >= UCD.COMPATIBILITY) {
                String decomp = nfkd.normalize(cp);
                CEList celistDecomp = getCEList(cp, decomp, true, decompType);
                CEList celistNormal = getCEList(UTF16.valueOf(cp), false);
                if (!celistNormal.equals(celistDecomp)) {
                    Utility.fixDot();
                    System.out.println();
                    System.out.println(ucd.getCodeAndName(cp));
                    System.out.println(celistNormal);
                    System.out.println(celistDecomp);
                }
            }
        }
    }
    public static void test(UCA collatorIn) throws Exception {
        collator = collatorIn;
        CEList.main(null);
        System.out.println("# Overlap");
        System.out.println("# Generated " + Default.getDate());
        ucd = UCD.make();
        nfd = new Normalizer(Normalizer.NFD, collatorIn.getUCDVersion());
        nfkd = new Normalizer(Normalizer.NFKD, collatorIn.getUCDVersion());
        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
        // store data for faster lookup
        System.out.println("# Gathering Data");
        int counter = 0;
        int[] lenArray = new int[1];
        while (true) {
            Utility.dot(counter++);
            String s = cc.next(ces, lenArray);
            if (s == null) break;
            int len = lenArray[0];
            CEList currCEList = new CEList(ces, 0, len);
            addString(s, currCEList);
        }
        /*
        for (int cp = 0x10000; cp <= 0x10FFFF; ++cp) {
            if (!ucd.isRepresented(cp)) continue;
            byte decompType = ucd.getDecompositionType(cp);
            if (decompType >= UCD.COMPATIBILITY) {
                String decomp = nfkd.normalize(cp);
                CEList celist = getCEList(cp, decomp, true, decompType);
                addString(decomp, celist);
                System.out.println("Adding: " + ucd.getCodeAndName(cp) + "\t" + celist);
            }
        }
        */
        Utility.fixDot();
        System.out.println("# Completes Count: " + completes.size());
        System.out.println("# Initials Count: " + initials.size());
        System.out.println("# Writing Overlaps");
        // simpleList();
        fullCheck();
    }
    public static void addString(String s, CEList currCEList) {
        back.put(s, currCEList);
        completes.put(currCEList, s);
        for (int i = 1; i < currCEList.length(); ++i) {
            CEList start = currCEList.start(i);
            Set bag = (Set) initials.get(start);
            if (bag == null) {
                bag = new TreeSet();
                initials.put(start, bag);
            }
            bag.add(s);
        }
    }
    static void simpleList() {
        Iterator it = completes.keySet().iterator();
        int counter = 0;
        int foundCount = 0;
        while (it.hasNext()) {
            Utility.dot(counter++);
            // see if the ces for the current element are the start of something else
            CEList key = (CEList) it.next();
            String val = (String) completes.get(key);
            Set probe = (Set) initials.get(key);
            if (probe != null) {
            Utility.fixDot();
            foundCount++;
            System.out.println("Possible Overlap: ");
            System.out.println("  " + ucd.getCodeAndName(val));
            System.out.println("\t" + key);
            Iterator it2 = probe.iterator();
            int count2 = 0;
            while (it2.hasNext()) {
                String match = (String) it2.next();
                CEList ceList = (CEList) back.get(match);
                System.out.println((count2++) + ".  " + ucd.getCodeAndName(match));
                System.out.println("\t" + ceList);
            }
            }
        }
        System.out.println("# Found Count: " + foundCount);
    }
    static boolean PROGRESS = false;
    static void fullCheck() throws IOException {
        PrintWriter log = Utility.openPrintWriter(collator.getUCA_GEN_DIR(), "Overlap.html", Utility.UTF8_WINDOWS);
        PrintWriter simpleList = Utility.openPrintWriter(collator.getUCA_GEN_DIR(), "Overlap.txt", Utility.UTF8_WINDOWS);
        Iterator it = completes.keySet().iterator();
        int counter = 0;
        int foundCount = 0;
        String [] goalChars = new String[1];
        String [] matchChars = new String[1];
        // CEList show = getCEList("\u2034");
        Utility.writeHtmlHeader(log, "Overlaps");
        log.print("<table>");
        while (it.hasNext()) {
            Utility.dot(counter++);
            CEList key = (CEList) it.next();
            if (key.length() < 2) continue;
            String val = (String) completes.get(key);
            goalChars[0] = "";
            matchChars[0] = "";
            if (matchWhole(val, key, 0, goalChars, matchChars)) {
                simpleList.println(ucd.getCodeAndName(val));
                goalChars[0] = val + goalChars[0]; // fix first char
                if (!getCEList(goalChars[0]).equals(getCEList(matchChars[0]))) {
                    log.println("<tr><td colspan='6'>WARNING:" + getCEList(matchChars[0]) + "</td></tr>");
                }
                foundCount++;
                log.println("<tr><td>" + val + "</td>");
                log.println("<td>" + goalChars[0] + "</td>");
                log.println("<td>" + matchChars[0] + "</td>");
                log.println("<td>" + ucd.getCodeAndName(goalChars[0]) + "</td>");
                log.println("<td>" + ucd.getCodeAndName(matchChars[0]) + "</td>");
                log.println("<td>" + getCEList(goalChars[0]) + "</td></tr>");
                //log.println("\t" + );
            }
        }
        log.println("</tr></table>Number of Overlapping characters: " + foundCount + "</body>");
        log.close();
        simpleList.close();
    }
    static private CEList getCEList(String s) {
        return getCEList(s, true);
    }
    static private CEList getCEList(String s, boolean decomp) {
        int len = collator.getCEs(s, decomp, ces);
        return new CEList(ces, 0, len);
    }
    static private CEList getCEList(int originalChar, String s, boolean decomp, byte type) {
        int len = collator.getCEs(s, decomp, ces);
        if (decomp) {
            for (int i = 0; i < len; ++i) {
                ces[i] = UCA.makeKey(UCA.getPrimary(ces[i]), 
                    UCA.getSecondary(ces[i]),
                    CEList.remap(originalChar, type, UCA.getTertiary(ces[i])));
            }
        }
        return new CEList(ces, 0, len);
    }
    static boolean matchWhole(String goalStr, CEList goal, int depth, String[] goalChars, String[] otherChars) {
        if (PROGRESS) System.out.println(Utility.repeat(". ", depth) + "Trying: " + ucd.getCodeAndName(goalStr) + ", " + goal);
        // to stop infinite loops, we limit the depth to 5
        if (depth > 5) {
            if (PROGRESS) System.out.println(Utility.repeat(". ", depth) + "stack exhausted");
            return false;
        }
        String match;
        // There are 3 possible conditions. Any of which work.
        // To eliminate double matches at the top level, we test depth > 0
        if (depth > 0) {
            // Condition 1.
            // we have an exact match
            match = (String) completes.get(goal);
            if (match != null) {
                if (PROGRESS) System.out.println(Utility.repeat(". ", depth) + "Matches Exactly: " + ucd.getCodeAndName(match));
                otherChars[0] = match + otherChars[0];
                if (PROGRESS) System.out.println(Utility.repeat(". ", depth)
                    + ucd.getCode(goalChars[0])
                    + " / " + ucd.getCode(otherChars[0])
                );
                return true;
            }
            // Condition 2
            // this whole string matches some initial portion of another string
            // AND the remainder of that other string also does a matchWhole.
            // Example: if we get the following, we search for a match to "de"
            // abc...
            // abcde
            // If we find a match, we append to the strings, the string for abc
            // and the one for abcde
            Set probe = (Set) initials.get(goal);
            if (probe != null) {
                Iterator it2 = probe.iterator();
                while (it2.hasNext()) {
                    match = (String) it2.next();
                    if (PROGRESS) System.out.println(Utility.repeat(". ", depth) + "Matches Longer: " + ucd.getCodeAndName(match)
                        + "\t\tswitching");
                    CEList trail = ((CEList) back.get(match)).end(goal.length());
                    boolean doesMatch = matchWhole(match, trail, depth+1, otherChars, goalChars);
                    if (doesMatch) {
                        otherChars[0] = match + otherChars[0];
                        if (PROGRESS) System.out.println(Utility.repeat(". ", depth)
                            + ucd.getCode(goalChars[0])
                            + " / " + ucd.getCode(otherChars[0])
                        );
                        return true;
                    }
                }
            }
        }
        // Condition 3
        // the first part of this string matches a whole other string
        // and the remainder of this string also does a matchWhole
        // Example: if we get the following, we search for a match to "de"
        // abcde..
        // abc..
        // if we find a match
        for (int i = goal.length() - 1; i > 0; --i) {
            CEList first = goal.start(i);
            match = (String) completes.get(first);
            if (match != null) {
                if (PROGRESS) System.out.println(Utility.repeat(". ", depth) + "Matches Shorter: " + ucd.getCodeAndName(match));
                boolean doesMatch = matchWhole("", goal.end(i), depth+1, goalChars, otherChars);
                if (doesMatch) {
                    otherChars[0] = match + otherChars[0];
                    if (PROGRESS) System.out.println(Utility.repeat(". ", depth)
                        + ucd.getCode(goalChars[0])
                        + " / " + ucd.getCode(otherChars[0])
                    );
                    return true;
                }
            }
        }
        // if we get this far, we failed.
        return false;
    }
    public static void generateRevision (UCA collatorIn) throws Exception {
        //generateRevision(collatorIn, false);
        generateRevision(collatorIn, true);
    }
    public static void generateRevision (UCA collatorIn, boolean doMax) throws Exception {
        collator = collatorIn;
        CEList.main(null);
        System.out.println("# Generate");
        System.out.println("# Generated " + Default.getDate());
        ucd = UCD.make();
        nfd = new Normalizer(Normalizer.NFD, collatorIn.getUCDVersion());
        nfkd = new Normalizer(Normalizer.NFKD, collatorIn.getUCDVersion());
        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
        // store data for faster lookup
        System.out.println("# Gathering Data");
        int counter = 0;
        int[] lenArray = new int[1];
        Set list = new TreeSet();
        Map newCollisions = new HashMap();
        Map oldCollisions = new HashMap();
        Map newProblems = new TreeMap();
        Map oldProblems = new TreeMap();
        CEList nullCEList = new CEList(new int[1]);
        while (true) {
            Utility.dot(counter++);
            String str = cc.next(ces, lenArray);
            if (str == null) break;
            int len = lenArray[0];
            CEList oldList = new CEList(ces, 0, len);
            CEList newList = new CEList(ces,0,0);
            int cp;
            for (int i = 0; i < str.length(); i += UTF16.getCharCount(cp)) {
                cp = UTF16.charAt(str, i);
                if (0xFF3F == cp) {
                    System.out.println("debug");
                }
                boolean mashLast = false;
                if (!nfkd.isNormalized(cp)) {
                    String decomp = nfkd.normalize(cp);
                    String canon = nfd.normalize(cp);
                    len = collator.getCEs(decomp, true, ces);
                    if (!decomp.equals(canon)) {
                        byte type = ucd.getDecompositionType(cp);
                        for (int j = 0; j < len; ++j) {
                            int p = (i == 0 && decomp.length() > 1 && decomp.charAt(0) == ' ' ? 0x20A : UCA.getPrimary(ces[j]));
                            int s = UCA.getSecondary(ces[j]);
                            boolean needsFix = (s != 0x20 && p != 0);
                            if (needsFix) ++len;
                            int t = (doMax && j > 0 ? 0x1F : CEList.remap(cp, type, UCA.getTertiary(ces[j])));
                            if (needsFix) {
                                ces[j++] = UCA.makeKey(p, 0x20, t);             // Set Extra
                                System.arraycopy(ces, j, ces, j+1, len - j);    // Insert HOLE!
                                p = 0;
                            }
                            ces[j] = UCA.makeKey(p, s, t);
                        }
                    }
                } else {
                    len = collator.getCEs(UTF16.valueOf(cp), true, ces);
                }
                CEList inc = new CEList(ces, 0, len);
                if (cp == 0xFF71 || cp == 0xFF67) {
                    System.out.println("  String: " + ucd.getCodeAndName(cp));
                    System.out.println("  Type: " + ucd.getDecompositionTypeID(cp));
                    System.out.println("  xxx: " + inc);
                }
                newList = newList.append(inc);
            }
            if (newList.length() == 0) newList = nullCEList;
            if (oldList.length() == 0) oldList = nullCEList;
            if (!newList.equals(oldList)) {
                /*
                System.out.println("String: " + ucd.getCodeAndName(str));
                System.out.println("\tOld: " + oldList);
                System.out.println("\tNew: " + newList);
                */
                list.add(new Pair(newList, new Pair(str, oldList)));
            }
            // check for collisions
            if (str.equals("\u206F")) {
                System.out.println("debug");
            }
            Object probe = newCollisions.get(newList);
            if (probe == null) {
                newCollisions.put(newList, str);
            } else {
                newProblems.put(str, new Pair((String)probe, newList));
            }
            probe = oldCollisions.get(oldList);
            if (probe == null) {
                oldCollisions.put(oldList, str);
            } else {
                oldProblems.put(str, new Pair((String)probe, oldList));
            }
        }
        Set newKeys = new TreeSet(newProblems.keySet());
        Set oldKeys = new TreeSet(oldProblems.keySet());
        Set joint = new TreeSet(newKeys);
        joint.retainAll(oldKeys);
        newKeys.removeAll(joint);
        oldKeys.removeAll(joint);
        PrintWriter log = Utility.openPrintWriter(collator.getUCA_GEN_DIR(), "UCA-old-vs-new" + (doMax ? "-MAX.txt" : ".txt"), Utility.UTF8_WINDOWS);
        Iterator it = list.iterator();
        int last = -1;
        while (it.hasNext()) {
            Utility.dot(counter++);
            Pair value = (Pair) it.next();
            CEList newList = (CEList)value.first;
            int cur = UCA.getPrimary(newList.at(0));
            if (cur != last) {
                log.println();
                last = cur;
            }
            Pair v2 = (Pair) value.second;
            String ss = (String)v2.first;
            log.println(ucd.getCodeAndName(ss) + "\t\t" + ucd.getDecompositionTypeID(ss.charAt(0)));
            log.println("\tnew:\t" + value.first);
            log.println("\told:\t" + v2.second);
        }
        /*
        log.println();
        log.println("New Collisions: " + newKeys.size());
        it = newKeys.iterator();
        while (it.hasNext()) {
            String key = (String) it.next();
            CEList cel = (CEList) newProblems.get(key);
            String other = (String) newCollisions.get(cel);
            log.println(ucd.getCodeAndName(key) + " collides with " + ucd.getCodeAndName(other));
            log.println("\t" + cel);
        }
        log.println("Removed Collisions: " + oldKeys.size());
        it = oldKeys.iterator();
        while (it.hasNext()) {
            String key = (String) it.next();
            CEList cel = (CEList) oldProblems.get(key);
            String other = (String) oldCollisions.get(cel);
            log.println(ucd.getCodeAndName(key) + " collides with " + ucd.getCodeAndName(other));
            log.println("\t" + cel);
        }
        */
        showCollisions(log, "New Collisions:", newKeys, newProblems);
        showCollisions(log, "Old Collisions:", oldKeys, oldProblems);
        showCollisions(log, "In Both:", joint, oldProblems);
        log.close();
    }
    static void showCollisions(PrintWriter log, String title, Set bad, Map probs) {
        log.println();
        log.println(title + bad.size());
        Iterator it = bad.iterator();
        Set lister = new TreeSet();
        while (it.hasNext()) {
            String key = (String) it.next();
            Pair pair = (Pair) probs.get(key);
            String other = (String) pair.first;
            CEList cel = (CEList) pair.second;
            if (key.equals("\u0001")) {
                System.out.println("debug");
            }
            lister.add(new Pair(cel, ucd.getCodeAndName(key) + ",\t" + ucd.getCodeAndName(other)));
        }
        it = lister.iterator();
        int last = -1;
        while (it.hasNext()) {
            Pair pair = (Pair) it.next();
            CEList cel = (CEList) pair.first;
            int curr = UCA.getPrimary(cel.at(0));
            if (curr != last) {
                last = curr;
                log.println();
            }
            log.println("Collision between: " + pair.second);
            log.println("\t" + pair.first);
        }
        log.flush();
    }
    public static void checkHash(UCA collatorIn) throws Exception {
        collator = collatorIn;
        System.out.println("# Check Hash");
        System.out.println("# Generated " + Default.getDate());
        ucd = UCD.make();
        //nfd = new Normalizer(Normalizer.NFD);
        //nfkd = new Normalizer(Normalizer.NFKD);
        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, nfd);
        nfd = new Normalizer(Normalizer.NFD, collatorIn.getUCDVersion());
        nfkd = new Normalizer(Normalizer.NFKD, collatorIn.getUCDVersion());
        int tableLength = 257;
        /*
 257 263 269 271 277 281 283 293 307 311 313 317 
 331 337 347 349 353 359 367 373 379 383 389 397 
 401 409 419 421 431 433 439 443 449 457 461 463 
 467 479 487 491 499 503 509 521 523 541 547 557 
 563 569 571 577 587 593 599 601 607 613 617 619 
 631 641 643 647 653 659 661 673 677 683 691 701 
 709 719 727 733 739 743 751 757 761 769 773 787 
 797 809 811 821 823 827 829 839 853 857 859 863 
 877 881 883 887 907 911 919 929 937 941 947 953 
 967 971 977 983 991 997 
        */
        int [][] collisions = new int[LIMIT_SCRIPT][];
        BitSet[] repeats = new BitSet[LIMIT_SCRIPT];
        for (int i = 0; i < collisions.length; ++i) {
            collisions[i] = new int[tableLength];
            repeats[i] = new BitSet();
        }
        int counter = 0;
        int[] lenArray = new int[1];
        if (false) while (true) {
            Utility.dot(counter++);
            String s = cc.next(ces, lenArray);
            if (s == null) break;
            if (UTF16.countCodePoint(s) != 1) continue; // skip ligatures
            int cp = UTF16.charAt(s, 0);
            if (!nfkd.isNormalized(cp)) continue;
            int script = ucd.getScript(cp);
            int len = lenArray[0];
            for (int i = 0; i < len; ++i) {
                int prim = UCA.getPrimary(ces[i]);
                int hash = prim % tableLength;
                if (!repeats[script].get(prim)) {
                    ++collisions[script][hash];
                    repeats[script].set(prim);
                } else {
                    System.out.println("Skipping: " + prim + " in " + ucd.getCodeAndName(cp));
                }
                if (!repeats[UNUSED_SCRIPT].get(prim)) {
                    ++collisions[UNUSED_SCRIPT][hash];
                    repeats[UNUSED_SCRIPT].set(prim);
                }
            }
        }
        String [] latin = new String[tableLength];
        for (int i = 0; i < latin.length; ++i) {
            latin[i] = "";
        }
        for (int cp = 0; cp < 0x10FFFF; ++cp) {
            Utility.dot(counter++);
            if (!ucd.isAllocated(cp)) continue;
            if (!nfkd.isNormalized(cp)) continue;
            if (ucd.getCategory(cp) == Lu) continue; // don't count case
            String scp = UTF16.valueOf(cp);
            int len = collator.getCEs(scp, true, ces);
            int script = ucd.getScript(cp);
            for (int i = 0; i < len; ++i) {
                int prim = UCA.getPrimary(ces[i]);
                int hash = prim % tableLength;
                if (!repeats[script].get(prim)) {
                    ++collisions[script][hash];
                    repeats[script].set(prim);
                    if (script == LATIN_SCRIPT) latin[hash] += scp;
                }
                if (!repeats[UNUSED_SCRIPT].get(prim)) {
                    ++collisions[UNUSED_SCRIPT][hash];
                    repeats[UNUSED_SCRIPT].set(prim);
                }
            }
        }
        System.out.println("Data Gathered");
        PrintWriter log = Utility.openPrintWriter(collator.getUCA_GEN_DIR(), "checkstringsearchhash.html", Utility.UTF8_WINDOWS);
        Utility.writeHtmlHeader(log, "Check Hash");
        log.println("<h1>Collisions</h1>");
        log.println("<p>Shows collisions among primary values when hashed to table size = " + tableLength + ".");
        log.println("Note: All duplicate primarys are removed: all non-colliding values are removed.</p>");
        log.println("<table><tr><th>Script</th><th>Sum</th><th>Average</th><th>Std Dev.</th></tr>");
        for (byte i = 0; i < collisions.length; ++i) {
            if (i == UNUSED_SCRIPT) continue;
            showCollisions(log, ucd.getScriptID_fromIndex(i), collisions[i]);
        }
        showCollisions(log, "All", collisions[UNUSED_SCRIPT]);
        log.println("</table>");
        log.println("<p>Details of collisions for Latin</p>");
        for (int i = 0; i < latin.length; ++i) {
            if (latin[i].length() < 2) continue;
            //if (UTF16.countCodePoint(latin[i]) < 2) continue;
            int cp2;
            log.println("<table>");
            for (int j = 0; j < latin[i].length(); j += UTF16.getCharCount(cp2)) {
                cp2 = UTF16.charAt(latin[i], j);
                String scp2 = UTF16.valueOf(cp2);
                CEList clist = collator.getCEList(scp2, true);
                log.println("<tr><td>" + scp2 + "</td><td>" + clist + "</td><td>" + ucd.getCodeAndName(cp2) + "</td></tr>");
            }
            log.println("</table><br>");
        }
        log.close();
    }
    static java.text.NumberFormat nf = new java.text.DecimalFormat("#,##0.00");
    static java.text.NumberFormat nf0 = new java.text.DecimalFormat("#,##0");
    static void showCollisions(PrintWriter log, String title, int[] curr) {
        double sum = 0;
        int count = 0;
        for (int j = 0; j < curr.length; ++j) {
            if (curr[j] == 0) continue;
            sum += curr[j];
            ++count;
        }
        double average = sum / count;
        double sd = 0;
        for (int j = 0; j < curr.length; ++j) {
            if (curr[j] == 0) continue;
            double deviation = curr[j] - average;
            sd += deviation * deviation;
        }
        sd = Math.sqrt(sd / count);
        log.println("<tr><td>" + title
            + "</td><td align='right'>" + nf0.format(sum)
            + "</td><td align='right'>" + nf.format(average)
            + "</td><td align='right'>" + nf.format(sd)
            + "</td></tr>");            
    }
    public static void listCyrillic(UCA collatorIn) throws IOException {
        PrintWriter log = Utility.openPrintWriter(collator.getUCA_GEN_DIR(), "ListCyrillic.txt", Utility.UTF8_WINDOWS);
        Set set = new TreeSet(collatorIn);
        Set set2 = new TreeSet(collatorIn);
        ucd = UCD.make();
        nfd = new Normalizer(Normalizer.NFD, collatorIn.getUCDVersion());
        for (char i = 0; i < 0xFFFF; ++i) {
            Utility.dot(i);
            if (!ucd.isRepresented(i)) continue;
            if (ucd.getScript(i) != CYRILLIC_SCRIPT) continue;
            String decomp = nfd.normalize(String.valueOf(i));
            String oldDecomp = decomp;
            for (int j = 0; j < decomp.length(); ++j) {
                if (ucd.getCategory(decomp.charAt(j)) == Mn) {
                    decomp = decomp.substring(0,j) + decomp.substring(j+1);
                }
            }
            if (decomp.length() == 0) continue;
            set.add(decomp);
            if (!decomp.equals(oldDecomp)) set2.add(oldDecomp);
        }
        Iterator it = set.iterator();
        while (it.hasNext()) {
            String s = (String) it.next();
            String name = ucd.getName(s.charAt(0));
            Utility.replace(name, "CYRILLIC ", "");
            log.println("# " + s + " <> XXX ; # " + name);
        }
        it = set2.iterator();
        while (it.hasNext()) {
            String s = (String) it.next();
            String name = ucd.getName(s.charAt(0));
            Utility.replace(name, "CYRILLIC ", "");
            log.println("### " + s + " <> XXX ; # " + name);
        }
        log.close();
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCA/HTML-Part1.txt
+++ b/tools/unicodetools/com/ibm/text/UCA/HTML-Part1.txt
@ -1,46 +0,0 @@
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 <meta http-equiv="Content-Language" content="en-us">
 <meta name="VI60_defaultClientScript" content="JavaScript">
 <meta name="GENERATOR" content="Microsoft FrontPage 4.0">
 <meta name="keywords" content="Unicode Standard, technical reports">
 <meta name="ProgId" content="FrontPage.Editor.Document">
 <title>Technical Reports</title>
 <link rel="stylesheet" type="text/css"
 href="http://www.unicode.org/webscripts/standard_styles.css">
 <script language="Javascript" src="http://www.unicode.org/webscripts/commonHeader.js"></script>
 </head>
 <body text="#330000" topmargin="0" leftmargin="0" marginwidth="0"
 marginheight="0">
 <form action="http://www.unicode.org/webscripts/POST">
  <table width="100%" cellpadding="0" cellspacing="0" border="0">
    <tr>
      <td colspan="2">
        <table width="100%" border="0" cellpadding="0" cellspacing="0">
          <tr>
            <td class="icon"><img border="0"
              src="http://www.unicode.org/webscripts/logo60s2.gif"
              align="middle" alt="[Unicode]" width="34" height="33">&nbsp;&nbsp;Charts</td>
            <td class="bar"><a href="http://www.unicode.org" class="bar">Home</a> 
              | <a href="http://www.unicode.org/sitemap/" class="bar">Site Map</a> 
              | <a href="http://www.unicode.org/search" class="bar">Search </a><script language="Javascript" src="http://www.unicode.org/webscripts/commonSearch.js"></script><noscript><a
              href="http://www.unicode.org/webscripts/quick_links.html"
              class="bar" target="_blank">Goto</a></noscript></td>
          </tr>
        </table>
      </td>
    </tr>
    <tr>
      <td colspan="2" class="gray">&nbsp;</td>
    </tr>
    <tr>
      <td>
        <h1>Collation Charts</h1>
      </td>
    </tr>
    <tr><td valign="top" class="navCol">
--- a/tools/unicodetools/com/ibm/text/UCA/HTML-Part2.txt
+++ b/tools/unicodetools/com/ibm/text/UCA/HTML-Part2.txt
@ -1,8 +0,0 @@
          <hr width="50%">
          <p align="center"><script language="Javascript" src="http://www.unicode.org/webscripts/lastModified.js"></script>              
        </blockquote>
      </td>
  </table>
 </form>
 </body>
 </html>
--- a/tools/unicodetools/com/ibm/text/UCA/Implicit.java
+++ b/tools/unicodetools/com/ibm/text/UCA/Implicit.java
@ -1,438 +0,0 @@
 package com.ibm.text.UCA;
 import com.ibm.text.UCD.UCD_Types;
 import com.ibm.text.utility.Utility;
 /**
 * For generation of Implicit CEs
 * @author Davis
 *
 * Cleaned up so that changes can be made more easily.
 * Old values:
 # First Implicit: E26A792D
 # Last Implicit: E3DC70C0
 # First CJK: E0030300
 # Last CJK: E0A9DD00
 # First CJK_A: E0A9DF00
 # Last CJK_A: E0DE3100
 */
 public class Implicit implements UCD_Types {
    /**
     * constants
     */
    static final boolean DEBUG = false;
    static final long topByte = 0xFF000000L;
    static final long bottomByte = 0xFFL;
    static final long fourBytes = 0xFFFFFFFFL;
    static final int MAX_INPUT = 0x220001; // 2 * Unicode range + 2
    /**
     * Testing function
     * @param args ignored
     */
    public static void main(String[] args) {
        System.out.println("Start");
        try {
            Implicit foo = new Implicit(0xE0, 0xE4);
            //int x = foo.getRawImplicit(0xF810);
            foo.getRawFromImplicit(0xE20303E7);
            int gap4 = foo.getGap4();
            System.out.println("Gap4: " + gap4); 
            int gap3 = foo.getGap3();
            int minTrail = foo.getMinTrail();
            int maxTrail = foo.getMaxTrail();
            long last = 0;
            long current;
            for (int i = 0; i <= MAX_INPUT; ++i) {
                current = foo.getImplicitFromRaw(i) & fourBytes;
                // check that it round-trips AND that all intervening ones are illegal
                int roundtrip = foo.getRawFromImplicit((int)current);
                if (roundtrip != i) {
                    foo.throwError("No roundtrip", i); 
                }
                if (last != 0) {
                    for (long j = last + 1; j < current; ++j) {
                        roundtrip = foo.getRawFromImplicit((int)j);
                        // raise an error if it *doesn't* find an error
                        if (roundtrip != -1) {
                            foo.throwError("Fails to recognize illegal", j);
                        }
                    }
                }
                // now do other consistency checks
                long lastBottom = last & bottomByte;
                long currentBottom = current & bottomByte;
                long lastTop = last & topByte;
                long currentTop = current & topByte;
                // do some consistency checks
                /*
                long gap = current - last;               
                if (currentBottom != 0) { // if we are a 4-byte
                    // gap has to be at least gap4
                    // and gap from minTrail, maxTrail has to be at least gap4
                    if (gap <= gap4) foo.throwError("Failed gap4 between", i);
                    if (currentBottom < minTrail + gap4) foo.throwError("Failed gap4 before", i);
                    if (currentBottom > maxTrail - gap4) foo.throwError("Failed gap4 after", i);
                } else { // we are a three-byte
                    gap = gap >> 8; // move gap down for comparison.
                    long current3Bottom = (current >> 8) & bottomByte;
                    if (gap <= gap3) foo.throwError("Failed gap3 between ", i);
                    if (current3Bottom < minTrail + gap3) foo.throwError("Failed gap3 before", i);
                    if (current3Bottom > maxTrail - gap3) foo.throwError("Failed gap3 after", i);
                }
                */
                // print out some values for spot-checking
                if (lastTop != currentTop || i == 0x10000 || i == 0x110000) {
                    foo.show(i-3);
                    foo.show(i-2);
                    foo.show(i-1);
                    if (i == 0) {
                        // do nothing
                    } else if (lastBottom == 0 && currentBottom != 0) {
                        System.out.println("+ primary boundary, 4-byte CE's below");
                    } else if (lastTop != currentTop) {
                        System.out.println("+ primary boundary");
                    }
                    foo.show(i);
                    foo.show(i+1);
                    foo.show(i+2);
                    System.out.println("...");
                }
                last = current;
            }
            foo.show(MAX_INPUT-2);
            foo.show(MAX_INPUT-1);
            foo.show(MAX_INPUT);
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            System.out.println("End");
        }
    }
    private void throwError(String title, int cp) {
        throw new IllegalArgumentException(title + "\t" + Utility.hex(cp) + "\t" + Utility.hex(getImplicitFromRaw(cp) & fourBytes));
    }
    private void throwError(String title, long ce) {
        throw new IllegalArgumentException(title + "\t" + Utility.hex(ce & fourBytes));
    }
    private void show(int i) {
        if (i >= 0 && i <= MAX_INPUT) {
            System.out.println(Utility.hex(i) + "\t" + Utility.hex(getImplicitFromRaw(i) & fourBytes));
        } 
    }
    /**
     * Precomputed by constructor
     */
    int final3Multiplier;
    int final4Multiplier;
    int final3Count;
    int final4Count;
    int medialCount;
    int min3Primary;
    int min4Primary;
    int max4Primary;
    int minTrail;
    int maxTrail;
    int max3Trail;
    int max4Trail;
    int min4Boundary;
    public int getGap4() {
        return final4Multiplier - 1;
    }
    public int getGap3() {
        return final3Multiplier - 1;
    }
    // old comment
    // we must skip all 00, 01, 02, FF bytes, so most bytes have 252 values
    // we must leave a gap of 01 between all values of the last byte, so the last byte has 126 values (3 byte case)
    // we shift so that HAN all has the same first primary, for compression.
    // for the 4 byte case, we make the gap as large as we can fit.
    /**
     * Supply parameters for generating implicit CEs
     */
    public Implicit(int minPrimary, int maxPrimary) {
        // 13 is the largest 4-byte gap we can use without getting 2 four-byte forms.
        this(minPrimary, maxPrimary, 0x04, 0xFE, 1, 1);
    }
    /**
     * Set up to generate implicits.
     * @param minPrimary
     * @param maxPrimary
     * @param minTrail final byte
     * @param maxTrail final byte
     * @param gap3 the gap we leave for tailoring for 3-byte forms
     * @param primaries3count number of 3-byte primarys we can use (normally 1)
     */
    public Implicit(int minPrimary, int maxPrimary, int minTrail, int maxTrail, int gap3, int primaries3count) {
    	if (DEBUG) {
    		System.out.println("minPrimary: " + Utility.hex(minPrimary));
 	    	System.out.println("maxPrimary: " + Utility.hex(maxPrimary));
 	    	System.out.println("minTrail: " + Utility.hex(minTrail));
 	    	System.out.println("maxTrail: " + Utility.hex(maxTrail));
 	    	System.out.println("gap3: " + Utility.hex(gap3));
 	    	System.out.println("primaries3count: " + primaries3count);
    	}
        // some simple parameter checks
        if (minPrimary < 0 || minPrimary >= maxPrimary || maxPrimary > 0xFF) throw new IllegalArgumentException("bad lead bytes");
        if (minTrail < 0 || minTrail >= maxTrail || maxTrail > 0xFF) throw new IllegalArgumentException("bad trail bytes");
        if (primaries3count < 1) throw new IllegalArgumentException("bad three-byte primaries");
        this.minTrail = minTrail;
        this.maxTrail = maxTrail;
        min3Primary = minPrimary;
        max4Primary = maxPrimary;
        // compute constants for use later.
        // number of values we can use in trailing bytes
        // leave room for empty values between AND above, e.g. if gap = 2
        // range 3..7 => +3 -4 -5 -6 -7: so 1 value
        // range 3..8 => +3 -4 -5 +6 -7 -8: so 2 values
        // range 3..9 => +3 -4 -5 +6 -7 -8 -9: so 2 values
        final3Multiplier = gap3 + 1;
        final3Count = (maxTrail - minTrail + 1) / final3Multiplier;
        max3Trail = minTrail + (final3Count - 1) * final3Multiplier;
        // medials can use full range
        medialCount = (maxTrail - minTrail + 1);
        // find out how many values fit in each form
        int threeByteCount = medialCount * final3Count;
        // now determine where the 3/4 boundary is.
        // we use 3 bytes below the boundary, and 4 above
        int primariesAvailable = maxPrimary - minPrimary + 1;
        int primaries4count = primariesAvailable - primaries3count;        
        int min3ByteCoverage = primaries3count * threeByteCount;
        min4Primary = minPrimary + primaries3count;
        min4Boundary = min3ByteCoverage;
        // Now expand out the multiplier for the 4 bytes, and redo.
        int totalNeeded = MAX_INPUT - min4Boundary;
        int neededPerPrimaryByte = divideAndRoundUp(totalNeeded, primaries4count);
        if (DEBUG) System.out.println("neededPerPrimaryByte: " + neededPerPrimaryByte);
        int neededPerFinalByte = divideAndRoundUp(neededPerPrimaryByte, medialCount * medialCount);
        if (DEBUG) System.out.println("neededPerFinalByte: " + neededPerFinalByte);
        int gap4 = (maxTrail - minTrail - 1) / neededPerFinalByte;
        if (DEBUG) System.out.println("expandedGap: " + gap4);
        if (gap4 < 1) throw new IllegalArgumentException("must have larger gap4s");
        final4Multiplier = gap4 + 1;
        final4Count = neededPerFinalByte;
        max4Trail = minTrail + (final4Count - 1) * final4Multiplier;
        if (primaries4count * medialCount * medialCount * final4Count < MAX_INPUT) {
            throw new IllegalArgumentException("internal error");
        } 
        if (DEBUG) {
            System.out.println("final4Count: " + final4Count);
            for (int counter = 0; counter < final4Count; ++counter) {
                int value = minTrail + (1 + counter)*final4Multiplier;
                System.out.println(counter + "\t" + value + "\t" + Utility.hex(value));
            }
        }
    }
    static public int divideAndRoundUp(int a, int b) {
        return 1 + (a-1)/b;
    }
    /**
     * Converts implicit CE into raw integer
     * @param implicit
     * @return -1 if illegal format
     */
    public int getRawFromImplicit(int implicit) {
        int result;
        int b3 = implicit & 0xFF;
        implicit >>= 8;
        int b2 = implicit & 0xFF;
        implicit >>= 8;
        int b1 = implicit & 0xFF;
        implicit >>= 8;
        int b0 = implicit & 0xFF;
        // simple parameter checks
        if (b0 < min3Primary || b0 > max4Primary
          || b1 < minTrail || b1 > maxTrail) return -1;
        // normal offsets
        b1 -= minTrail;
        // take care of the final values, and compose
        if (b0 < min4Primary) {
            if (b2 < minTrail || b2 > max3Trail || b3 != 0) return -1;
            b2 -= minTrail;
            int remainder = b2 % final3Multiplier;
            if (remainder != 0) return -1;
            b0 -= min3Primary;
            b2 /= final3Multiplier;
            result = ((b0 * medialCount) + b1) * final3Count + b2;
        } else {
             if (b2 < minTrail || b2 > maxTrail
            || b3 < minTrail || b3 > max4Trail) return -1;
            b2 -= minTrail;
            b3 -= minTrail;
            int remainder = b3 % final4Multiplier;
            if (remainder != 0) return -1;
            b3 /= final4Multiplier;
            b0 -= min4Primary;
            result = (((b0 * medialCount) + b1) * medialCount + b2) * final4Count + b3 + min4Boundary;
        }
        // final check
        if (result < 0 || result > MAX_INPUT) return -1;
        return result;
    }
    /**
     * Generate the implicit CE, from raw integer.
     * Left shifted to put the first byte at the top of an int.
     * @param cp code point
     * @return
     */
    public int getImplicitFromRaw(int cp) {
        if (cp < 0 || cp > MAX_INPUT) {
            throw new IllegalArgumentException("Code point out of range " + Utility.hex(cp));
        }
        int last0 = cp - min4Boundary;
        if (last0 < 0) {
            int last1 = cp / final3Count;
            last0 = cp % final3Count;
            int last2 = last1 / medialCount;
            last1 %= medialCount;
            last0 = minTrail + last0*final3Multiplier; // spread out, leaving gap at start
            last1 = minTrail + last1; // offset
            last2 = min3Primary + last2; // offset
            if (last2 >= min4Primary) {
                throw new IllegalArgumentException("4-byte out of range: " + Utility.hex(cp) + ", " + Utility.hex(last2));
            } 
            return (last2 << 24) + (last1 << 16) + (last0 << 8);
        } else {
            int last1 = last0 / final4Count;
            last0 %= final4Count;
            int last2 = last1 / medialCount;
            last1 %= medialCount;
            int last3 = last2 / medialCount;
            last2 %= medialCount;
            last0 = minTrail + last0*final4Multiplier; // spread out, leaving gap at start           
            last1 = minTrail + last1; // offset
            last2 = minTrail + last2; // offset
            last3 = min4Primary + last3; // offset
            if (last3 > max4Primary) {
                throw new IllegalArgumentException("4-byte out of range: " + Utility.hex(cp) + ", " + Utility.hex(last3));
            } 
            return (last3 << 24) + (last2 << 16) + (last1 << 8) + last0;
        }
    }
    /**
     * Gets an Implicit from a code point. Internally, 
     * swaps (which produces a raw value 0..220000, 
     * then converts raw to implicit.
     * @param cp
     * @return
     */
    public int getSwappedImplicit(int cp) {
        if (DEBUG) System.out.println("Incoming: " + Utility.hex(cp));
        // Produce Raw value
        // note, we add 1 so that the first value is always empty!!
        cp = Implicit.swapCJK(cp) + 1;
        // we now have a range of numbers from 0 to 220000.
        if (DEBUG) System.out.println("CJK swapped: " + Utility.hex(cp));
        return getImplicitFromRaw(cp);
    }
    /**
        * Function used to: 
        * a) collapse the 2 different Han ranges from UCA into one (in the right order), and
        * b) bump any non-CJK characters by 10FFFF.
        * The relevant blocks are:
        * A:    4E00..9FFF; CJK Unified Ideographs
        *       F900..FAFF; CJK Compatibility Ideographs
        * B:    3400..4DBF; CJK Unified Ideographs Extension A
        *       20000..XX;  CJK Unified Ideographs Extension B (and others later on)
        * As long as
        *   no new B characters are allocated between 4E00 and FAFF, and
        *   no new A characters are outside of this range,
        * (very high probability) this simple code will work.
        * The reordered blocks are:
        * Block1 is CJK
        * Block2 is CJK_COMPAT_USED
        * Block3 is CJK_A
        * (all contiguous)
        * Any other CJK gets its normal code point
        * Any non-CJK gets +10FFFF
        * When we reorder Block1, we make sure that it is at the very start,
        * so that it will use a 3-byte form.
        * Warning: the we only pick up the compatibility characters that are
        * NOT decomposed, so that block is smaller!
        */
    static int NON_CJK_OFFSET = 0x110000;
    static int swapCJK(int i) {
        if (i >= CJK_BASE) {
            if (i < CJK_LIMIT)              return i - CJK_BASE;
            if (i < CJK_COMPAT_USED_BASE)   return i + NON_CJK_OFFSET;
            if (i < CJK_COMPAT_USED_LIMIT)  return i - CJK_COMPAT_USED_BASE
                                                    + (CJK_LIMIT - CJK_BASE);
            if (i < CJK_B_BASE)             return i + NON_CJK_OFFSET;
            if (i < CJK_B_LIMIT)            return i; // non-BMP-CJK
            return i + NON_CJK_OFFSET;  // non-CJK
        }
        if (i < CJK_A_BASE)                 return i + NON_CJK_OFFSET;
        if (i < CJK_A_LIMIT)                return i - CJK_A_BASE
                                                    + (CJK_LIMIT - CJK_BASE) 
                                                    + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE);
        return i + NON_CJK_OFFSET; // non-CJK
    }
    /**
     * @return
     */
    public int getMinTrail() {
        return minTrail;
    }
    /**
     * @return
     */
    public int getMaxTrail() {
        return maxTrail;
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCA/Main.java
+++ b/tools/unicodetools/com/ibm/text/UCA/Main.java
@ -1,175 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/Main.java,v $ 
 * $Date: 2005/04/06 15:15:43 $ 
 * $Revision: 1.20 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCA;
 import java.io.File;
 import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.text.CanonicalIterator;
 import com.ibm.icu.text.UTF16;
 import com.ibm.text.UCD.*;
 import com.ibm.text.utility.*;
 public class Main {
 	//static final String UCDVersion = "4.0.0";
 	static final String[] ICU_FILES = {"writeCollationValidityLog", "writeFractionalUCA",
 		"WriteRules", "WriteRulesXML", "writeconformance", "writeconformanceshifted", 
 		"short", 
 		"WriteRules", "WriteRulesXML", "writeconformance", "writeconformanceshifted",
        "noCE", "short",
        "WriteRules",
        "collationChart" 
    };
 	public static void main(String args[]) throws Exception {
 		// NOTE: so far, we don't need to build the UCA with anything but the latest versions.
 		// A few changes would need to be made to the code to do older versions.
        try {
            if (args.length == 0) args = new String[] {"?"}; // force the help comment
            boolean shortPrint = false;
            boolean noCE = false;
            for (int i = 0; i < args.length; ++i) {
                String arg = args[i];
                System.out.println("OPTION: " + arg);
    			if (arg.charAt(0) == '#') return; // skip rest of line
                if (arg.equalsIgnoreCase("ICU")) {
                    args = Utility.append(ICU_FILES, Utility.subarray(args, i+1));
                    i = -1;
                    continue;     
                }
                if (arg.equalsIgnoreCase("version")) {
                	Default.setUCD(args[++i]); // get next arg
                	continue;
                }
                if (WriteCollationData.collator == null) {
                    System.out.println("Building UCA");
                    String file = Utility.searchDirectory(new File(UCD_Types.BASE_DIR + "UCA\\" + Default.ucdVersion() + "\\"), "allkeys", true, ".txt");
                    WriteCollationData.collator = new UCA(file, Default.ucdVersion());
                    System.out.println("Built version " + WriteCollationData.collator.getDataVersion()
                    	+ "/ucd: " + WriteCollationData.collator.getUCDVersion());
                    System.out.println("Building UCD data");
                    WriteCollationData.ucd = UCD.make(WriteCollationData.collator.getUCDVersion());
                }
                if (arg.equalsIgnoreCase("GenOverlap")) GenOverlap.test(WriteCollationData.collator);
                else if (arg.equalsIgnoreCase("validateUCA")) GenOverlap.validateUCA(WriteCollationData.collator);
                //else if (arg.equalsIgnoreCase("writeNonspacingDifference")) WriteCollationData.writeNonspacingDifference();
    			else if (arg.equalsIgnoreCase("collationChart")) WriteCharts.collationChart(WriteCollationData.collator);
    			else if (arg.equalsIgnoreCase("scriptChart")) WriteCharts.scriptChart();
                else if (arg.equalsIgnoreCase("normalizationChart")) WriteCharts.normalizationChart();
                else if (arg.equalsIgnoreCase("caseChart")) WriteCharts.caseChart();
                else if (arg.equalsIgnoreCase("indexChart")) WriteCharts.indexChart();
                else if (arg.equalsIgnoreCase("special")) WriteCharts.special();
                else if (arg.equalsIgnoreCase("writeCompositionChart")) WriteCharts.writeCompositionChart();
                else if (arg.equalsIgnoreCase("CheckHash")) GenOverlap.checkHash(WriteCollationData.collator);
                else if (arg.equalsIgnoreCase("generateRevision")) GenOverlap.generateRevision(WriteCollationData.collator);
                else if (arg.equalsIgnoreCase("listCyrillic")) GenOverlap.listCyrillic(WriteCollationData.collator);
                else if (arg.equalsIgnoreCase("WriteRules")) WriteCollationData.writeRules(WriteCollationData.WITHOUT_NAMES, shortPrint, noCE);
    			// else if (arg.equalsIgnoreCase("WriteRulesWithNames")) WriteCollationData.writeRules(WriteCollationData.WITH_NAMES);
                else if (arg.equalsIgnoreCase("WriteRulesXML")) WriteCollationData.writeRules(WriteCollationData.IN_XML, shortPrint, noCE);
                else if (arg.equalsIgnoreCase("checkDisjointIgnorables")) WriteCollationData.checkDisjointIgnorables();
                else if (arg.equalsIgnoreCase("writeContractions")) WriteCollationData.writeContractions();
                else if (arg.equalsIgnoreCase("writeFractionalUCA")) WriteCollationData.writeFractionalUCA("FractionalUCA");
                else if (arg.equalsIgnoreCase("writeConformance")) WriteCollationData.writeConformance("CollationTest_NON_IGNORABLE", UCA.NON_IGNORABLE, shortPrint);
                else if (arg.equalsIgnoreCase("writeConformanceSHIFTED")) WriteCollationData.writeConformance("CollationTest_SHIFTED", UCA.SHIFTED, shortPrint);
                else if (arg.equalsIgnoreCase("testCompatibilityCharacters")) WriteCollationData.testCompatibilityCharacters();
                else if (arg.equalsIgnoreCase("writeCollationValidityLog")) WriteCollationData.writeCollationValidityLog();
                else if (arg.equalsIgnoreCase("writeCaseExceptions")) WriteCollationData.writeCaseExceptions();
                else if (arg.equalsIgnoreCase("writeJavascriptInfo")) WriteCollationData.writeJavascriptInfo();
                else if (arg.equalsIgnoreCase("writeCaseFolding")) WriteCollationData.writeCaseFolding();
                else if (arg.equalsIgnoreCase("javatest")) WriteCollationData.javatest();
                else if (arg.equalsIgnoreCase("short")) shortPrint = !shortPrint;
                else if (arg.equalsIgnoreCase("noCE")) noCE = !noCE;
                else if (arg.equalsIgnoreCase("checkCanonicalIterator")) checkCanonicalIterator();
    			else if (arg.equalsIgnoreCase("writeAllocation")) WriteCharts.writeAllocation();
    			// else if (arg.equalsIgnoreCase("probe")) Probe.test(); 
                else {
                    System.out.println();
                    System.out.println("UNKNOWN OPTION (" + arg + "): must be one of the following (case-insensitive)");
                    System.out.println("\tWriteRulesXML, WriteRulesWithNames, WriteRules,");
                    System.out.println("\tcheckDisjointIgnorables, writeContractions,");
                    System.out.println("\twriteFractionalUCA, writeConformance, writeConformanceSHIFTED, testCompatibilityCharacters,");
                    System.out.println("\twriteCollationValidityLog, writeCaseExceptions, writeJavascriptInfo, writeCaseFolding");
                    System.out.println("\tjavatest, hex (used for conformance)");
                }
            }        
        } finally {
            System.out.println("Done");
            /*
            String s = WriteCollationData.collator.getSortKey("\u1025\u102E", UCA.NON_IGNORABLE, true);
            System.out.println(Utility.hex("\u0595\u0325") + ", " + WriteCollationData.collator.toString(s));
            String t = WriteCollationData.collator.getSortKey("\u0596\u0325", UCA.NON_IGNORABLE, true);
            System.out.println(Utility.hex("\u0596\u0325") + ", " + WriteCollationData.collator.toString(t));
            Normalizer foo = new Normalizer(Normalizer.NFKD);
            char x = '\u1EE2';
            System.out.println(Utility.hex(x) + " " + ucd.getName(x));
            String nx = foo.normalize(x);
            for (int i = 0; i < nx.length(); ++i) {
                char c = nx.charAt(i);
                System.out.println(ucd.getCanonicalClass(c));
            }
            System.out.println(Utility.hex(nx, " ") + " " + ucd.getName(nx));
            */
        }
    }
 	/**
 	 * 
 	 */
 	private static void checkCanonicalIterator() {
 		int firstImplicit = WriteCollationData.getImplicitPrimary(UCD_Types.CJK_BASE);
 		System.out.println("UCD_Types.CJK_BASE: " + Utility.hex(UCD_Types.CJK_BASE));
 		System.out.println("first implicit: " + Utility.hex((long)(firstImplicit & 0xFFFFFFFFL)));
 		CanonicalIterator it = new CanonicalIterator("");
 		String[] tests = new String[] {"\uF900", "\u00C5d\u0307\u0327"};
 		for (int j = 0; j < tests.length; ++j) {
 			System.out.println(Default.ucd().getCodeAndName(tests[j]));
 			it.setSource(tests[j]);
 			String ss;
 			for (int i = 0; (ss = it.next()) != null; ++i) {
 				System.out.println(i + "\t" + Default.ucd().getCodeAndName(ss));
 			}
 		}
 		// verify that nothing breaks
 		for (int i = 0; i < 0x10FFFF; ++i) {
 			int cat = UCharacter.getType(i);
 			if (cat == UCharacter.UNASSIGNED || cat == UCharacter.PRIVATE_USE || cat == UCharacter.SURROGATE) continue;
 			String s = UTF16.valueOf(i);
 			try {
 				it.setSource(s);
 			} catch (RuntimeException e) {
 				System.out.println("Failure with U+" + Utility.hex(i));
 				e.printStackTrace();
 			}
 		}
 	}
 }
--- a/tools/unicodetools/com/ibm/text/UCA/RuleComparator.java
+++ b/tools/unicodetools/com/ibm/text/UCA/RuleComparator.java
@ -1,67 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/RuleComparator.java,v $ 
 * $Date: 2001/08/31 00:20:40 $ 
 * $Revision: 1.2 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCA;
 import com.ibm.text.UCD.*;
 import com.ibm.text.utility.*;
 public final class RuleComparator implements java.util.Comparator {
    public int compare(Object s, Object t) {
        String ss = (String)s;
        String tt = (String)t;
        // compare just the initial portions of each level, FIRST
        // only if there is a difference outside of the initial level do we stop
        // we assume that there are the same number of levels!!
        int si = 0;
        int ti = 0;
        int result = 0;
        try {
            while (si < ss.length() && ti < tt.length()) {
                char cs = ss.charAt(si++);
                char ct = tt.charAt(ti++);
                if (cs == ct) continue;
                /*
                if (cs == 0) {
                    if (result == 0) result = -1;
                    while (ct != 0 && ti < tt.length()) {
                        ct = tt.charAt(ti++);
                    }
                    continue;
                }
                if (ct == 0) {
                    if (result == 0) result = 1;
                    while (cs != 0 && si < ss.length()) {
                        cs = ss.charAt(si++);
                    }
                    continue;
                }
                */
                if (cs < ct) return -1;
                return  1;
            }
        } catch (StringIndexOutOfBoundsException e) {
            System.out.println("WHOOPS: ");
            System.out.println(si + ", " + Utility.hex(ss));
            System.out.println(ti + ", " + Utility.hex(tt));
        }
        if (result != 0) return result;
        if (ss.length() > tt.length()) return 1;
        if (ss.length() < tt.length()) return -1;
        return 0;
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCA/UCA.java
+++ b/tools/unicodetools/com/ibm/text/UCA/UCA.java
--- a/tools/unicodetools/com/ibm/text/UCA/UCA_Data.java
+++ b/tools/unicodetools/com/ibm/text/UCA/UCA_Data.java
@ -1,336 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/UCA_Data.java,v $ 
 * $Date: 2006/06/08 18:16:40 $ 
 * $Revision: 1.4 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCA;
 import java.util.*;
 import java.io.BufferedReader;
 import java.io.Reader;
 import java.io.PrintWriter;
 import java.io.FileReader;
 import java.text.MessageFormat;
 import java.io.IOException;
 import com.ibm.text.UCD.Normalizer;
 import com.ibm.text.UCD.UCD;
 import com.ibm.text.utility.*;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;
 public class UCA_Data implements UCA_Types {
    static final boolean DEBUG = false;
    static final boolean DEBUG_SHOW_ADD = false;
    static final boolean lessThan410 = false;
    private Normalizer toD;
    private UCD ucd;
    public UCA_Data(Normalizer toD, UCD ucd) {
        this.toD = toD;
        this.ucd = ucd;
    }
    /**
     * The collation element data is stored a couple of different structures.
     * First is collationElements, which generally contains the 32-bit CE corresponding
     * to the data. It is directly indexed by character code.<br>
     * For brevity in the implementation, we just use a flat array.
     * A real implementation would use a multi-stage table, as described in TUS Section 5.
     * table of simple collation elements, indexed by char.<br>
     * Exceptional cases: expanding, contracting, unsupported are handled as described below.
     */
    private int[] collationElements = new int[65536];
    /**
     * Although a single character can expand into multiple CEs, we don't want to burden
     * the normal case with the storage. So, they get a special value in the collationElements
     * array. This value has a distinct primary weight, followed by an index into a separate
     * table called expandingTable. All of the CEs in that table, up to a TERMINATOR value
     * will be used for the expansion. The implementation is as a stack; this just makes it
     * easy to generate.
     */
    private IntStack expandingTable = new IntStack(3600); // initial number is from compKeys
    /**
     * For now, this is just a simple mapping of strings to collation elements.
     * The implementation depends on the contracting characters being "completed",
     * so that it can be efficiently determined when to stop looking.
     */
    private Map contractingTable = new TreeMap();
    {
        // clear some tables
        for (int i = 0; i < collationElements.length; ++i) {
            collationElements[i] = UNSUPPORTED_FLAG;
        }
        // preload with parts
        for (char i = 0xD800; i < 0xDC00; ++i) {
            collationElements[i] = CONTRACTING;
            addToContractingTable(String.valueOf(i), UNSUPPORTED_FLAG);
        }
        checkConsistency();
    }
    /**
     * Return the type of the CE
     */
    public byte getCEType(int ch) {
        if (ch > 0xFFFF) ch = UTF16.getLeadSurrogate(ch); // first if expands
        int ce = collationElements[ch];
        if (ce == UNSUPPORTED_FLAG) {
            // Special check for Han, Hangul
            if (ucd.isHangulSyllable(ch)) return HANGUL_CE;
            if (ucd.isCJK_BASE(ch)) return CJK_CE;
            if (ucd.isCJK_AB(ch)) return CJK_AB_CE;
            // special check for unsupported surrogate pair, 20 1/8 bits
            //if (0xD800 <= ch && ch <= 0xDFFF) {
            //    return SURROGATE_CE;
            //}
            return UNSUPPORTED_CE;
        }
        if (ce == CONTRACTING) return CONTRACTING_CE;
        if ((ce & EXPANDING_MASK) == EXPANDING_MASK) return EXPANDING_CE;
        return NORMAL_CE;
    }
    public void add(String source, IntStack ces) {
        add(new StringBuffer(source), ces);
    }
    public void add(StringBuffer source, IntStack ces) {
        if (DEBUG_SHOW_ADD) {
            System.out.println("Adding: " + ucd.getCodeAndName(source.toString()) + CEList.toString(ces));
        }
        if (source.length() < 1 || ces.length() < 1) {
            throw new IllegalArgumentException("String or CEs too short");
        }
        int ce;
        if (ces.length() == 1) {
            ce = ces.get(0);
        } else {
            ce = EXPANDING_MASK | expandingTable.getTop();
            expandingTable.append(ces);
            expandingTable.append(TERMINATOR);
        }
        // assign CE(s) to char(s)
        char value = source.charAt(0);
        //if (value == 0x10000) System.out.print("DEBUG2: " + source);
        if (source.length() > 1) {
            addToContractingTable(source, ce);
            if (collationElements[value] == UNSUPPORTED_FLAG) {
                collationElements[value] = CONTRACTING; // mark special
            } else if (collationElements[value] != CONTRACTING) {
                // move old value to contracting table!
                //contractingTable.put(String.valueOf(value), new Integer(collationElements[value]));
                addToContractingTable(String.valueOf(value), collationElements[value]);
                collationElements[value] = CONTRACTING; // signal we must look up in table
            }
        } else if (collationElements[value] == CONTRACTING) {
            // must add old value to contracting table!
            addToContractingTable(source, ce);
            //contractingTable.put(source, new Integer(ce));
        } else {
            collationElements[source.charAt(0)] = ce; // normal
        }
        //if (DEBUG) checkConsistency();
    }
    boolean isCompletelyIgnoreable(int cp) {
        int ce = collationElements[cp < UTF16.SUPPLEMENTARY_MIN_VALUE ? cp : UTF16.getLeadSurrogate(cp)];
        if (ce == 0) return true;
        if (ce != CONTRACTING) return false;
        Object newValue = contractingTable.get(UTF16.valueOf(cp));       
        if (newValue == null) return false;
        return ((Integer)newValue).intValue() == 0;
    }
    // returns new pos, fills in result.
    public int get(char ch, StringBuffer decompositionBuffer, int index, IntStack result) {
        int ce = collationElements[ch];
        if (ce == CONTRACTING) {
            // Contracting is probably the most interesting (read "tricky") part
            // of the algorithm.
            // First get longest substring that is in the contracting table.
            // For simplicity, we use a hash table for contracting.
            // There are much better optimizations, 
            // but they take a more complicated build algorithm than we want to show here.
            // NOTE: We are guaranteed that the first code unit is in the contracting table because
            // of the build process.
            String probe = String.valueOf(ch);
            Object value = contractingTable.get(probe);
            if (value == null) throw new IllegalArgumentException("Missing value for " + Utility.hex(ch));
            // complete the first character, if part of supplementary
            if (UTF16.isLeadSurrogate(ch) && index < decompositionBuffer.length()) {
                char ch2 = decompositionBuffer.charAt(index);
                String newProbe = probe + ch2;
                Object newValue = contractingTable.get(newProbe);
                if (newValue != null) {
                    probe = newProbe;
                    value = newValue;
                    index++;
                }
            }           
            // We loop, trying to add successive CODE UNITS to the longest substring.
            int cp2;
            while (index < decompositionBuffer.length()) {
                //char ch2 = decompositionBuffer.charAt(index);
                cp2 = UTF16.charAt(decompositionBuffer, index);
                int increment = UTF16.getCharCount(cp2);
                // CHECK if last char was completely ignorable
                if (lessThan410 && isCompletelyIgnoreable(cp2)) {
                    index += increment; // just skip char don't set probe, value
                    continue;
                }
                // see whether the current string plus the next char are in
                // the contracting table.
                String newProbe = probe + UTF16.valueOf(cp2);
                Object newValue = contractingTable.get(newProbe);
                if (newValue == null) break;    // stop if not in table.
                // We succeeded--so update our new values, and set index
                // and quaternary to indicate that we swallowed another character.
                probe = newProbe;
                value = newValue;
                index += increment;
            }
            // Now, see if we can add any combining marks
            short lastCan = 0;
            int increment;
            for (int i = index; i < decompositionBuffer.length(); i += increment) {
                // We only take certain characters. They have to be accents,
                // and they have to not be blocked.
                // Unlike above, if we don't find a match (and it was an accent!)
                // then we don't stop, we continue looping.
                cp2 = UTF16.charAt(decompositionBuffer, i);
                increment = UTF16.getCharCount(cp2);
                short can = toD.getCanonicalClass(cp2);
                if (can == 0) break;            // stop with any zero (non-accent)
                if (can == lastCan) continue;   // blocked if same class as last
                lastCan = can;                  // remember for next time
                // CHECK if last char was completely ignorable. If so, skip it.
                if (lessThan410 && isCompletelyIgnoreable(cp2)) {
                    continue;
                }
                // Now see if we can successfully add it onto our string
                // and find it in the contracting table.
                String newProbe = probe + UTF16.valueOf(cp2);
                Object newValue = contractingTable.get(newProbe);
                if (newValue == null) continue;
                // We succeeded--so update our new values, remove the char, and update
                // quaternary to indicate that we swallowed another character.
                probe = newProbe;
                value = newValue;
                decompositionBuffer.setCharAt(i,'\u0000');  // zero char
                if (increment == 2) {
                    // WARNING: we had a supplementary character. zero BOTH parts
                    decompositionBuffer.setCharAt(i+1,'\u0000');  // zero char
                }
            }
            // we are all done, and can extract the CE from the last value set.
            ce = ((Integer)value).intValue();
        }
        // if the CE is not expanding) we are done.
        if ((ce & EXPANDING_MASK) != EXPANDING_MASK) {
            result.push(ce);
        } else {
            // expanding, so copy list of items onto stack
            int ii = ce & EXCEPTION_INDEX_MASK; // get index
            // copy onto stack from index until reach TERMINATOR
            while (true) {
                ce = expandingTable.get(ii++);
                if (ce == TERMINATOR) break;
                result.push(ce);
            }
        }
        return index;
    }
    private void addToContractingTable(Object s, int ce) {
        if (s == null) {
            throw new IllegalArgumentException("String can't be null");
        }
        contractingTable.put(s.toString(), new Integer(ce));
    }
    void checkConsistency() {
                // at this point, we have to guarantee that the contractingTable is CLOSED
        // e.g. if a substring of length n is in the table, then the first n-1 characters
        // are also!!
        // First check consistency. the CE for a value is CONTRACTING if and only if there is a contraction starting
        // with that value.
        UnicodeSet ceSet = new UnicodeSet();
        for (int i = 0; i < collationElements.length; ++i) {
            if (collationElements[i] == CONTRACTING) ceSet.add(i);
        }
        UnicodeSet ceSet2 = new UnicodeSet();
        Iterator enum1 = contractingTable.keySet().iterator();
        while (enum1.hasNext()) {
            String sequence = (String)enum1.next();
            ceSet2.add(sequence.charAt(0));
        }
        if (!ceSet.equals(ceSet2)) {
            System.out.println("In both: " + new UnicodeSet(ceSet).retainAll(ceSet2).toPattern(true));
            System.out.println("CONTRACTING but not in table: " + new UnicodeSet(ceSet).removeAll(ceSet2).toPattern(true));
            System.out.println("In table but not CONTRACTING: " + new UnicodeSet(ceSet2).removeAll(ceSet).toPattern(true));
            throw new IllegalArgumentException("Inconsistent data");
        }
 /*
 0FB2 0F71 ; [.124E.0020.0002.0FB2][.125F.0020.0002.0F71] # TIBETAN SUBJOINED LETTER RA + TIBETAN VOWEL SIGN AA
 0FB3 0F71 ; [.1250.0020.0002.0FB3][.125F.0020.0002.0F71] # TIBETAN SUBJOINED LETTER LA + TIBETAN VOWEL SIGN AA
        int[] temp1 = int[20];
        int[] temp2 = int[20];
        int[] temp3 = int[20];
        getCEs("\u0fb2", true, temp1);
        getCEs("\u0fb3", true, temp2);
        getCEs("\u0f71", true, temp3);
        add("\u0FB2\u0F71", concat(temp1, temp3));
 */
    }
    Iterator getContractions() {
        return contractingTable.keySet().iterator();
    }
    int getContractionCount() {
        return contractingTable.size();
    }
    boolean contractionTableContains(String s) {
        return contractingTable.get(s) != null;
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCA/UCA_Types.java
+++ b/tools/unicodetools/com/ibm/text/UCA/UCA_Types.java
@ -1,98 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/UCA_Types.java,v $ 
 * $Date: 2005/04/06 08:48:17 $ 
 * $Revision: 1.7 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCA;
 import com.ibm.text.UCD.*;
 import com.ibm.text.utility.*;
 public interface UCA_Types {
    /**
     * Version of the UCA tables to use
     */
    //private static final String VERSION = "-3.0.1d3"; // ""; // "-2.1.9d7"; 
    //public static final String UCA_BASE = "4.1.0";  // "3.1.1"; //      ; // ""; // "-2.1.9d7"; 
    //public static final String VERSION = "-" + UCA_BASE; //  + "d6" ""; // "-2.1.9d7"; 
    public static final String ALLFILES = "allkeys"; // null if not there
    public static final String BASE_UCA_GEN_DIR = UCD_Types.GEN_DIR + "collation" + "\\";
    public static final char LEVEL_SEPARATOR = '\u0000'; 
    /**
     * Expanding characters are marked with a exception bit combination
     * in the collationElement table.
     * This means that they map to more than one CE, which is looked up in
     * the expansionTable by index.
     */
    static final int EXPANDING_MASK = 0xFFFF0000; // marks expanding range start
    /**
     * This mask is used to get the index from an EXPANDING exception.
     * The contracting characters can also make use of this in a future optimization.
     */
    static final int EXCEPTION_INDEX_MASK = 0x0000FFFF;
    /**
     * Contracting characters are marked with a exception bit combination 
     * in the collationElement table.
     * This means that they are the first character of a contraction, and need
     * to be looked up (with following characters) in the contractingTable.<br>
     * This isn't a MASK since there is exactly one value.
     */
    static final int CONTRACTING = 0xFFFE0000;
    static final int UNSUPPORTED_FLAG = 0xFFFD0000;
    /**
     * Used to composed Hangul and Han characters
     */
    static final int NEUTRAL_SECONDARY = 0x20;
    static final int NEUTRAL_TERTIARY = 0x02;
    /** Enum for alternate handling */
    public static final byte SHIFTED = 0, ZEROED = 1, NON_IGNORABLE = 2, SHIFTED_TRIMMED = 3, LAST = 3;
    /**
     * Used to terminate a list of CEs
     */
    public static final int TERMINATOR = 0xFFFFFFFF;   // CE that marks end of string
    /**
     * Any unsupported characters (those not in the UCA data tables) 
     * are marked with a exception bit combination
     * so that they can be treated specially.<br>
     * There are at least 34 values, so that we can use a range for surrogates
     * However, we do add to the first weight if we have surrogate pairs!
     */
    static final int UNSUPPORTED_CJK_BASE = 0xFB40;
    static final int UNSUPPORTED_CJK_AB_BASE = 0xFB80;
    static final int UNSUPPORTED_OTHER_BASE = 0xFBC0;
    static final int UNSUPPORTED_BASE = UNSUPPORTED_CJK_BASE;
    static final int UNSUPPORTED_LIMIT = UNSUPPORTED_OTHER_BASE + 0x40;
    /**
     *  Special char value that means failed or terminated
     */
    static final char NOT_A_CHAR = '\uFFFF';
    /**
     * CEType
     */
    static final byte NORMAL_CE = 0, CONTRACTING_CE = 1, EXPANDING_CE = 2, 
        CJK_CE = 3, CJK_AB_CE = 4, HANGUL_CE = 5, UNSUPPORTED_CE = 7,
        FIXED_CE = 3;
        // SURROGATE_CE = 6, 
 }
--- a/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java
+++ b/tools/unicodetools/com/ibm/text/UCA/WriteCharts.java
--- a/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java
+++ b/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java
--- a/tools/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java
+++ b/tools/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java
--- a/tools/unicodetools/com/ibm/text/UCA/case_help.html
+++ b/tools/unicodetools/com/ibm/text/UCA/case_help.html
@ -1,62 +0,0 @@
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 <meta name="GENERATOR" content="Microsoft FrontPage 4.0">
 <meta name="ProgId" content="FrontPage.Editor.Document">
 <title>Chart Instructions</title>
 <style>
 <!--
 th           { background-color: #eeeeee }
 -->
 </style>
 </head>
 <body>
 <h1>Instructions</h1>
 <p>The Case Charts provide charts of the characters in Unicode that differ from 
 at least one of their case forms (lower, title, upper, or fold).</p>
 <blockquote>
  <p><i>To properly view these charts, your browser should be reasonably recent 
  so it handles Unicode and cascading style sheets, and you should install a 
  Unicode font and configure your browser to use it.</i></p>
 </blockquote>
 <p><b>Notes:</b></p>
 <ul>
  <li>The index pages are ordered by the following:
    <ul>
      <li>By script, unless the script is COMMON or INHERITED</li>
      <li>By general category, in the latter two cases</li>
      <li>If characters have a decomposition containing a cased character, but 
        do not have a case mapping (lower, title, upper, or fold), then they are 
        listed in NoCaseMapping.</li>
    </ul>
  </li>
  <li>Within each chart page, the code points are sorted by lowercased <a href="http://www.unicode.org/unicode/reports/tr15/" target="_top">NFKD</a>, 
    to place related characters next to one another.</li>
  <li>To help pick out cells visually, the more interesting ones have a light 
    blue background. The other cells have grayed-out text.
    <ul>
      <li>The more interesting ones are:
        <ul>
          <li><i>lower: </i>if different than the character</li>
          <li><i>title: </i>if different than upper</li>
          <li><i>upper: </i>if different than the character</li>
          <li><i>fold: </i>if different than lower</li>
        </ul>
      </li>
    </ul>
  </li>
  <li>If your browser supports tool-tops, then hovering your mouse over cells 
    will show the names of the characters.</li>
  <li>For more information, see <a href="http://www.unicode.org/unicode/reports/tr21/" target="_top">UAX 
    #21: Case Mappings</a>.</li>
 </ul>
 </body>
 </html>
--- a/tools/unicodetools/com/ibm/text/UCA/case_index_header.html
+++ b/tools/unicodetools/com/ibm/text/UCA/case_index_header.html
@ -1,35 +0,0 @@
 <!doctype HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"><html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 <meta http-equiv="Content-Language" content="en-us">
 <meta name="keywords" content="Basic">
 <title>Case Chart</title>
 <style><!--
 p            { font-size: 90% }
 --></style>
 <base target="main">
 <link rel="stylesheet" type="text/css"
 href="http://www.unicode.org/webscripts/standard_styles.css">
 </head>
 <body>
 <table width="100%" cellpadding="0" cellspacing="0" border="0">
  <tr>
    <td colspan="2">
    <table width="100%" border="0" cellpadding="0" cellspacing="0">
      <tr>
        <td class="icon"><a href="http://www.unicode.org/"><img border="0" 
        src="http://www.unicode.org/webscripts/logo60s2.gif" align="middle" 
        alt="[Unicode]" width="34" height="33"></a>&nbsp;&nbsp;<a class="bar" 
        href="http://www.unicode.org/unicode/faq/"><font size="3">Charts</font></a>
      </tr>
    </table>
    </td>
  </tr>
 </table>
 <div class="body">
 <!-- BEGIN CONTENTS -->
 <h2 align="center">Case Chart</h2>
 <p align="center"><a href="help.html">Help</a>
--- a/tools/unicodetools/com/ibm/text/UCA/charts.css
+++ b/tools/unicodetools/com/ibm/text/UCA/charts.css
@ -1,21 +0,0 @@
 td			{ border: 1 solid #0000FF; color: #000000; background-color: #FFFFFF; 
            font-size: 120%; text-align: Center; vertical-align: top; width: 48px }
 td.p		{ color: #000000; background-color: #7777FF }
 td.s		{ color: #000000; background-color: #BBBBFF }
 td.t		{ color: #000000; background-color: #DDDDFF }
 td.q		{ color: #000000; background-color: #FFFFFF }
 td.ep		{ color: #000000; background-color: #FF5555 }
 td.es		{ color: #000000; background-color: #FF7777 }
 td.et		{ color: #000000; background-color: #FF9999 }
 td.eq		{ color: #000000; background-color: #FFBBBB }
 th			{ vertical-align: top; font-weight: bold }
 th.x		{ vertical-align: top; font-weight: regular; text-align: Left }
 tt			{ font-size: 50% }
 td.name	{ text-align: left; vertical-align: middle; width: 96% }
 body		{ background-color: #FFFFFF; }
 td.g		{ font-size: 120%; text-align: Center; width: 72px; color: #808080; }
 td.n		{ font-size: 120%; text-align: Center; width: 72px; color: #000000; background-color: #CCCCFF; } 
 td.z		{ font-size: 120%; text-align: Center; width: 72px; font-weight: bold; background-color: #EEEEEE; }
 td.h		{ font-size: 120%; text-align: Left; color: #000000; background-color: #EEEEEE; } 
--- a/tools/unicodetools/com/ibm/text/UCA/help.html
+++ b/tools/unicodetools/com/ibm/text/UCA/help.html
@ -1,125 +0,0 @@
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 <meta http-equiv="Content-Language" content="en-us">
 <link rel="stylesheet" href="charts.css" type="text/css">
 <meta name="GENERATOR" content="Microsoft FrontPage 4.0">
 <meta name="ProgId" content="FrontPage.Editor.Document">
 <title>UCA Chart Help</title>
 <base target="main">
 </head>
 <body>
 <h2 align="center">UCA Chart Help</h2>
 <p>This set of charts shows the Unicode Collation Algorithm values for Unicode 
 characters. The characters are arranged in the following groups:</p>
 <table cellspacing="0" cellpadding="4">
  <tr>
    <th align="left"><i>Null</i></th>
    <th class="x">Completely ignoreable (primary, secondary and tertiary levels)<br>
      These include control codes and various formatting codes.</th>
  </tr>
  <tr>
    <th align="left"><i>Ignorable</i></th>
    <th class="x">Ignorable at a primary level, but not at a secondary or 
      tertiary level.<br>
      These include most accents and diacritics.</th>
  </tr>
  <tr>
    <th align="left"><i>Variable</i></th>
    <th class="x">Characters that may be set to ignorable by a programmatic 
      switch.<br>
      These include spaces, punctuation marks, and most symbols.</th>
  </tr>
  <tr>
    <th align="left"><i>Common</i></th>
    <th class="x">Characters that are none of the above, but not considered 
      letters.<br>
      These include numbers, currency symbols, etc.</th>
  <tr>
    <th align="left"><i>Letters</i></th>
    <th class="x">According to script</th>
  </tr>
  <tr>
    <th align="left"><i>Unsupported</i></th>
    <th class="x">Not explicitly supported in this version of UCA; uses 
      code-point order</th>
  </tr>
 </table>
 <p>The characters* within each group are arranged in cells. The color of the 
 cell indicates the strength of the difference between that character and the <i>previous</i> 
 character in the chart, as follows.</p>
 <table cellspacing="0" cellpadding="4">
  <tr>
    <th colspan="2"><font size="3"><u>No Expansion</u></font>
    <th rowspan="5">&nbsp;
    <th colspan="2"><font size="3"><u>Expansion</u></font>
  </tr>
  <tr>
    <td class="p">a<br>
      <tt>0061</tt></td>
    <th class="x">Primary difference
    <td class="ep">ǳ<br>
      <tt>01F3</tt></td>
    <th class="x">Primary difference</th>
  </tr>
  <tr>
    <td class="s">á<br>
      <tt>00E1</tt></td>
    <th class="x">Secondary Difference</th>
    <td class="es">Ǳ<br>
      <tt>01F1</tt></td>
    <th class="x">Secondary Difference</th>
  </tr>
  <tr>
    <td class="t">A<br>
      <tt>0041</tt></td>
    <th class="x">Tertiary difference</th>
    <td class="et">ǲ<br>
      <tt>01F2</tt></td>
    <th class="x">Tertiary difference</th>
  <tr>
    <td class="q">Å<br>
      <tt>212B</tt></td>
    <th class="x">Quarternary difference<br>
      or no difference</th>
    <td class="eq">&nbsp;</td>
    <th class="x">Quarternary difference<br>
      or no difference</th>
  </tr>
 </table>
 <blockquote>
  <p align="left"><b>Note: </b>If tool-tips are enabled in your browser, then if 
  you pause the mouse over any cell, you will see the name of the character and 
  a representation of the sort key. In this representation, the separators 
  between the weight levels are represented with &quot;|&quot;.</p>
 </blockquote>
 <table>
  <tr>
    <th>*</th>
    <th class="x">In some cases, the UCA data table also includes contractions.<br>
      They can be recognized by the multiple code point numbers, as in the 
      following:</th>
    <td class="p">ஔ<br>
      <tt>0B92 0BD7</tt></td>
  </tr>
 </table>
 <h3><b>Notes</b></h3>
 <ul>
  <li>The UCA results are versioned <i>both</i> by the version of the UCA <i>and</i> 
    by the version of The Unicode Standard used to process the data.</li>
  <li>These charts only provide one of the alternatives for handling variable 
    characters (punctuation), whereby these characters are <b>non-ignorable.</b></li>
  <li>Characters from large blocks, such as CJK-Ideographs, Hangul Syllables, 
    Private Use Area, etc. are represented by a sampling.</li>
  <li>Some unassigned code points, noncharacters and other edge cases are also 
    added to the list for comparison.</li>
  <li>For more information, see <a href="http://www.unicode.org/unicode/reports/tr10/" target="_top">UTS 
    #10: Unicode Collation Algorithm</a>.</li>
 </ul>
 </body>
 </html>
--- a/tools/unicodetools/com/ibm/text/UCA/index.html
+++ b/tools/unicodetools/com/ibm/text/UCA/index.html
@ -1,21 +0,0 @@
 <html>
 <head>
 <title>%%%</title>
 <meta name="GENERATOR" content="Microsoft FrontPage 4.0">
 <meta name="ProgId" content="FrontPage.Editor.Document">
 </head>
 <frameset cols="192,*">
  <frame name="header" src="index_list.html" target="main" scrolling="auto">
  <frame name="main" src="help.html" target="main" scrolling="auto">
  <noframes>
  <body>
  <p>This page uses frames, but your browser doesn't support them.</p>
  </body>
  </noframes>
 </frameset>
 </html>
--- a/tools/unicodetools/com/ibm/text/UCA/index_header.html
+++ b/tools/unicodetools/com/ibm/text/UCA/index_header.html
@ -1,37 +0,0 @@
 <!doctype HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"><html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 <meta http-equiv="Content-Language" content="en-us">
 <meta name="keywords" content="Basic">
 <title>%%%</title>
 <style><!--
 p            { font-size: 90%; text-align: Center }
 --></style>
 <link rel="stylesheet" type="text/css"
 href="http://www.unicode.org/webscripts/standard_styles.css">
 <base target='main'>
 </head>
 <body class="navColTable">
 <table width="120%" cellpadding="0" cellspacing="0" border="0">
  <tr>
    <td colspan="2">
    <table width="100%" border="0" cellpadding="0" cellspacing="0">
      <tr>
        <td class="icon"><a href="http://www.unicode.org/" target='_top'><img border="0" 
        src="http://www.unicode.org/webscripts/logo60s2.gif" align="middle" 
        alt="[Unicode]" width="34" height="33"></a>&nbsp;&nbsp;<a class="bar" 
        href="http://www.unicode.org/charts/" target='_top'><font size="3">Charts</font></a>
      </tr>
    </table>
    </td>
  </tr>
  <tr>
    <td colspan="2" class="gray">&nbsp;</td>
  </tr>
 </table>
 <div class="body">
 <!-- BEGIN CONTENTS -->
 <h2 align="center">%%%</h2>
 <p><a href="help.html">Help</a>
--- a/tools/unicodetools/com/ibm/text/UCA/name_help.html
+++ b/tools/unicodetools/com/ibm/text/UCA/name_help.html
@ -1,55 +0,0 @@
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 <meta name="GENERATOR" content="Microsoft FrontPage 4.0">
 <meta name="ProgId" content="FrontPage.Editor.Document">
    <meta name="COPYRIGHT" content=
    "Copyright (c) 2002-2006 IBM Corporation and others. All Rights Reserved.">
 <title>Chart Instructions</title>
 <style>
 <!--
 th           { background-color: #eeeeee }
 -->
 </style>
 </head>
 <body>
 <h1>Instructions</h1>
 <p>The Name charts provide an index to Unicode names. Each word in each Unicode 
 character name is extracted, and used as an index for the characters. </p>
 <blockquote>
  <p><i>To properly view these charts, your browser should be reasonably recent 
  so it handles Unicode and cascading style sheets, and you should install a 
  Unicode font and configure your browser to use it.</i></p>
 </blockquote>
 <p><b>Notes:</b></p>
 <ul>
  <li>To keep the charts from becoming too large, a 'stop-list' of words are 
    omitted. These are:
    <ul>
      <li>AND, CAPITAL, CHARACTER, COMPATIBILITY, LETTER, SMALL, WITH</li>
      <li>All script names</li>
      <li>All words containing a digit</li>
      <li>All Hangul Syllables</li>
    </ul>
  </li>
  <li>Unlike some of the other charts, tool-tips to reveal the names are not 
    included (for compactness). However, if you want to know the name of any 
    particular characters:
    <ul>
      <li>Copy the character from the cell.</li>
      <li>Go to <a href="http://demo.icu-project.org/icu-bin/translit">http://demo.icu-project.org/icu-bin/translit</a></li>
      <li>Paste in under <b>Input 1</b></li>
      <li>Select <b>Output 1</b>: Any - Name</li>
    </ul>
  </li>
 </ul>
 </body>
 </html>
--- a/tools/unicodetools/com/ibm/text/UCA/norm_help.html
+++ b/tools/unicodetools/com/ibm/text/UCA/norm_help.html
@ -1,61 +0,0 @@
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 <meta name="GENERATOR" content="Microsoft FrontPage 4.0">
 <meta name="ProgId" content="FrontPage.Editor.Document">
 <title>Chart Instructions</title>
 <style>
 <!--
 th           { background-color: #eeeeee }
 -->
 </style>
 </head>
 <body>
 <h1>Instructions</h1>
 <p>The Normalization Charts provide charts of the characters in Unicode that 
 differ from at least one of their normalization forms (C, D, KC, KD).</p>
 <blockquote>
  <p><i>To properly view these charts, your browser should be reasonably recent 
  so it handles Unicode and cascading style sheets, and you should install a 
  Unicode font and configure your browser to use it.</i></p>
 </blockquote>
 <p><b>Notes:</b></p>
 <ul>
  <li>The index pages are ordered by the following:
    <ul>
      <li>By script, unless the script is COMMON or INHERITED</li>
      <li>By general category, in the latter two cases</li>
    </ul>
  </li>
  <li>Within each chart page, the code points are sorted by folded <a href="http://www.unicode.org/unicode/reports/tr15/" target="_top">NFKD</a>, 
    to place related characters next to one another.</li>
  <li>To keep the size of the Hangul chart manageable, characters U+AD00..U+D6FF 
    (관..훿) are omitted.</li>
  <li>To help pick out cells visually, the more interesting ones have a light 
    blue background. The other cells have grayed-out text.
    <ul>
      <li>The more interesting ones are:
        <ul>
          <li><i>C: </i>if different than the character</li>
          <li><i>D: </i>if different than C</li>
          <li><i>KC: </i>if different than C</li>
          <li><i>KD: </i>if different than KC and D</li>
        </ul>
      </li>
    </ul>
  </li>
  <li>If your browser supports tool-tops, then hovering your mouse over cells 
    will show the names of the characters.</li>
  <li>For more information, see <a href="http://www.unicode.org/unicode/reports/tr15/" target="_top">UAX 
    #15: Unicode Normalization Forms</a>.</li>
 </ul>
 </body>
 </html>
--- a/tools/unicodetools/com/ibm/text/UCA/norm_index_header.html
+++ b/tools/unicodetools/com/ibm/text/UCA/norm_index_header.html
@ -1,35 +0,0 @@
 <!doctype HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"><html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 <meta http-equiv="Content-Language" content="en-us">
 <meta name="keywords" content="Basic">
 <title>Normalization Chart</title>
 <style><!--
 p            { font-size: 90% }
 --></style>
 <base target="main">
 <link rel="stylesheet" type="text/css"
 href="http://www.unicode.org/webscripts/standard_styles.css">
 </head>
 <body>
 <table width="100%" cellpadding="0" cellspacing="0" border="0">
  <tr>
    <td colspan="2">
    <table width="100%" border="0" cellpadding="0" cellspacing="0">
      <tr>
        <td class="icon"><a href="http://www.unicode.org/"><img border="0" 
        src="http://www.unicode.org/webscripts/logo60s2.gif" align="middle" 
        alt="[Unicode]" width="34" height="33"></a>&nbsp;&nbsp;<a class="bar" 
        href="http://www.unicode.org/unicode/faq/"><font size="3">Charts</font></a>
      </tr>
    </table>
    </td>
  </tr>
 </table>
 <div class="body">
 <!-- BEGIN CONTENTS -->
 <h2 align="center">Normalization Chart</h2>
 <p align="center"><a href="help.html">Help</a>
--- a/tools/unicodetools/com/ibm/text/UCA/script_help.html
+++ b/tools/unicodetools/com/ibm/text/UCA/script_help.html
@ -1,31 +0,0 @@
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 <meta name="GENERATOR" content="Microsoft FrontPage 4.0">
 <meta name="ProgId" content="FrontPage.Editor.Document">
 <title>Chart Instructions</title>
 <style>
 <!--
 th           { background-color: #eeeeee }
 -->
 </style>
 </head>
 <body>
 <h1>Instructions</h1>
 <p>The Script charts provide an index to Unicode characters by script.</p>
 <blockquote>
  <p><i>To properly view these charts, your browser should be reasonably recent 
  so it handles Unicode and cascading style sheets, and you should install a 
  Unicode font and configure your browser to use it.</i></p>
 </blockquote>
 <p>Where the script = Common, the General Category is used in the index instead.</p>
 </body>
 </html>
--- a/tools/unicodetools/com/ibm/text/UCA/script_index_header.html
+++ b/tools/unicodetools/com/ibm/text/UCA/script_index_header.html
@ -1,35 +0,0 @@
 <!doctype HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"><html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 <meta http-equiv="Content-Language" content="en-us">
 <meta name="keywords" content="Basic">
 <title>Script Chart</title>
 <style><!--
 p            { font-size: 90% }
 --></style>
 <base target="main">
 <link rel="stylesheet" type="text/css"
 href="http://www.unicode.org/webscripts/standard_styles.css">
 </head>
 <body>
 <table width="100%" cellpadding="0" cellspacing="0" border="0">
  <tr>
    <td colspan="2">
    <table width="100%" border="0" cellpadding="0" cellspacing="0">
      <tr>
        <td class="icon"><a href="http://www.unicode.org/"><img border="0" 
        src="http://www.unicode.org/webscripts/logo60s2.gif" align="middle" 
        alt="[Unicode]" width="34" height="33"></a>&nbsp;&nbsp;<a class="bar" 
        href="http://www.unicode.org/unicode/faq/"><font size="3">Charts</font></a>
      </tr>
    </table>
    </td>
  </tr>
 </table>
 <div class="body">
 <!-- BEGIN CONTENTS -->
 <h2 align="center">Script Chart</h2>
 <p align="center"><a href="help.html">Help</a>
--- a/tools/unicodetools/com/ibm/text/UCD/BlocksHeader.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/BlocksHeader.txt
@ -1,6 +0,0 @@
 #
 # Note:   The casing of block names is not normative.
 #         For example, "Basic Latin" and "BASIC LATIN" are equivalent.
 #
 # Format:
 # Start Code..End Code; Block Name
--- a/tools/unicodetools/com/ibm/text/UCD/BuildNames.java
+++ b/tools/unicodetools/com/ibm/text/UCD/BuildNames.java
@ -1,657 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/BuildNames.java,v $
 * $Date: 2004/03/11 19:03:18 $
 * $Revision: 1.9 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCD;
 import java.io.IOException;
 import com.ibm.icu.text.UTF16;
 //import com.ibm.text.unicode.UInfo;
 import java.util.*;
 import java.io.*;
 //import java.text.*;
 import com.ibm.text.utility.*;
 public class BuildNames implements UCD_Types {
    static final boolean DEBUG = false;
    public static void main(String[] args) throws IOException {
        collectWords();
    }
    static Map words = new TreeMap(new LengthFirstComparator());
    static Map doubleWords = new TreeMap(new LengthFirstComparator());
    static Map tripleWords = new TreeMap(new LengthFirstComparator());
    static Map quadWords = new TreeMap(new LengthFirstComparator());
    static Set lines = new TreeSet(new LengthFirstComparator());
    static int[] letters = new int[128];
    static class Count {
    	Count(int count) {this.count = count;}
    	int count;
    }
    static String lastWord = "";
    static String preLastWord = "";
    static String prePreLastWord = "";
    static void addWord(String word, Map words) {
    	Count count = (Count) words.get(word);
    	if (count == null) {
    		count = new Count(0);
    		words.put(word, count);
    	}
        count.count++;
    }
    static void stash(String word, int position) {
    	addWord(word, words);
        // doubles
        if (position > 0) {
        	addWord(lastWord + "/" + word, doubleWords);
        }
        if (position > 1) {
        	addWord(preLastWord + "/" + lastWord + "/" + word, tripleWords);
        }
        if (position > 2) {
        	addWord(prePreLastWord + "/" + preLastWord + "/" + lastWord + "/" + word, quadWords);
        }
        prePreLastWord = preLastWord;
        preLastWord = lastWord;
        lastWord = word;
        for (int i = 0; i < word.length(); ++i) {
            letters[word.charAt(i)]++;
        }
    }
    static String transform(String line) {
        StringBuffer result = new StringBuffer();
        boolean changed = false;
        for (int i = 0; i < line.length(); ++i) {
            char c = line.charAt(i);
            if (c == '-' || c == '<' || c == '>') {
                if (result.length() > 0 && result.charAt(result.length()-1) != ' ') result.append(' ');
                result.append(c);
                if (i + 1 < line.length() && line.charAt(i+1) != ' ') result.append(' ');
                changed = true;
                continue;
            }
            if ('a' <= c && c <= 'z') {
                result.append((char)(c - 'a' + 'A'));
                changed = true;
                continue;
            }
            if ('0' <= c && c <= '9') {
                result.append('*').append((char)(c - '0' + 'A'));
                changed = true;
                continue;
            }
            result.append(c);
        }
        if (!changed) return line;
        return result.toString().trim();
    }
    static void printWords(Map words) {
        System.out.println();
        System.out.println("Finding largest");
        System.out.println();
        Map biggest = new TreeMap();
        Iterator it = words.keySet().iterator();
        while (it.hasNext()) {
        	String word = (String) it.next();
        	Count count = (Count) words.get(word);
        	biggest.put(new Integer(-count.count * word.length()), word); // make it negative just to reverse the sort
        }
        it = biggest.keySet().iterator();
        int counter = 0;
        while (it.hasNext()) {
        	if (counter++ > 50) break;
        	Integer key = (Integer) it.next();
        	String word = (String) biggest.get(key);
        	System.out.println(word + ":\t" + (-key.intValue()));
        }
    }  
    static void collectWords() throws IOException {
        String fname = "ShortNames.txt";
        System.out.println("Writing " + fname);
        PrintWriter log = Utility.openPrintWriter(fname, Utility.LATIN1_WINDOWS);
        System.out.println("Gathering data");
        //Counter counter = new Counter();
        String[] parts = new String[100];
        //int total = 0;
        int used = 0;
        int sum = 0;
        int longSum = 0;
        for (int cp = 0; cp < 0x10FFFF; ++cp) {
            if (!Default.ucd().isAllocated(cp)) continue;
            if (Default.ucd().hasComputableName(cp)) continue;
            Utility.dot(cp);
            String name;
            if (Default.ucd().isRepresented(cp)) {
                name = Default.ucd().getName(cp, SHORT);
                log.println(Utility.hex(cp) + " " + name);
                String backName = Utility.replace(name, UCD_Names.NAME_ABBREVIATIONS, false);
                if (!name.equals(backName)) {
                    System.out.println("Failed to recreate: " + name + ", " + backName);
                }
            }
            // check the string, and its decomposition. This is just to get a good count.
            String str = UTF16.valueOf(cp);
            if (false && !Default.nfkd().isNormalized(cp)) {
                str += Default.nfkd().normalize(cp);
            }
            int cp2;
            for (int i = 0; i < str.length(); i += UTF16.getCharCount(cp2)) {
                cp2 = UTF16.charAt(str, i);
                name = Default.ucd().getName(cp2, SHORT);
                if (name == null) continue;
                //name = transform(name);
                sum += name.length();
                longSum += Default.ucd().getName(cp2).length();
                used++;
                // replace numbers & letters
                int len = Utility.split(name, ' ', parts);
                for (int j = 0; j < len; ++j) {
                    stash(parts[j], j);
                }
                lines.add(name);
            }
        }
        log.close();
        Utility.fixDot();
        //System.out.println("Overhead: " + (lastLink - used) + ", " + ((lastLink - used) * 100 / used) + "%");
        //System.out.println("Strings: " + sum + ", " + (lastLink*4));
        System.out.println("Short Names sum: " + sum + ", average: " + (sum + 0.0)/used);
        System.out.println("Long Names sum: " + longSum + ", average: " + (longSum + 0.0)/used);
        System.out.println("Savings: " + (1 - (sum+0.0)/longSum));
        printWords(words);
        printWords(doubleWords);
        printWords(tripleWords);
        printWords(quadWords);
        if (true) return;
        System.out.println();
        System.out.println("Compacting Words");
        System.out.println();
        Iterator it = words.keySet().iterator();
        int i = 0;
        while (it.hasNext()) {
            String s = (String) it.next();
            int test = CompactName.addWord(s);
            String round = CompactName.stringFromToken(test);
            boolean goesRound = round.equals(s);
            if (false || !goesRound) System.out.println("Compacting: '" + s + "': " + i++ + "(" + CompactName.lastToken + ")"
                + (goesRound ? ": NO RT: '" + round + "'" : ""));
        }
        System.out.println();
        System.out.println("Compacting Lines");
        System.out.println();
        CompactName.startLines();
        it = lines.iterator();
        i = 0;
        while (it.hasNext()) {
            String s = (String) it.next();
            if (s.equals("< BELL >")) {
                System.out.println("DEBUG");
            }
            int test = CompactName.addLine(s);
            String round = CompactName.stringFromToken(test);
            boolean goesRound = round.equals(s);
            if (false || !goesRound) System.out.println("Compacting: '" + s + "': " + i++ + "(" + CompactName.lastToken + ")"
                + (!goesRound ? ": NO RT: '" + round + "'" : ""));
        }
        /*System.out.println("Printing Compact Forms");
        for (int i = 0; i < CompactName.lastToken; ++i) {
            String s = CompactName.stringFromToken(i);
            System.out.println(i + ": '" + s + "'");
        }*/
        System.out.println("Strings: " + sum
            + ", " + (CompactName.spacedMinimum*4)
            + ", " + (CompactName.lastToken*4)
        );
    }
    /*
        Set stuff = new TreeSet();
        for (int i = 0; i < letters.length; ++i) {
            if (letters[i] != 0) {
                stuff.add(new Integer((letters[i] << 8) + i));
            }
        }
        it = stuff.iterator();
        while (it.hasNext()) {
            int in = ((Integer) it.next()).intValue();
            System.out.println((char)(in & 0xFF) + ":\t" + String.valueOf(in >> 8));
        }
            int r = addString(name);
            if (!DEBUG && !rname.equals(name)) {
                System.out.println("\tNo Round Trip: '" + rname + "'");
            }
    */
    static Map stringToInt = new HashMap();
    static Map intToString = new HashMap();
    static final int[] remap = new int['Z'+1];
    static final int maxToken;
    static {
        int counter = 1;
        remap[' '] = counter++;
        remap['-'] = counter++;
        remap['>'] = counter++;
        remap['<'] = counter++;
        for (int i = 'A'; i <= 'Z'; ++i) {
            remap[i] = counter++;
        }
        for (int i = '0'; i <= '9'; ++i) {
            remap[i] = counter++;
        }
        maxToken = counter;
    }
    static final String[] unmap = new String[maxToken];
    static {
        unmap[0] = "";
        for (int i = 0; i < remap.length; ++i) {
            int x = remap[i];
            if (x != 0) unmap[x] = String.valueOf((char)i);
        }
    }
    static int[] links = new int[40000];
    static final int linkStart = 0;
    static int lastLink = 0;
    static final int LITERAL_BOUND = 0x7FFF - maxToken * maxToken;
    static boolean isLiteral(int i) {
        return (i & 0x7FFF) > LITERAL_BOUND;
    }
    static String lookup(int i) {
        String result;
        boolean trailingSpace = false;
        if ((i & 0x8000) != 0) {
            i ^= 0x8000;
            trailingSpace = true;
        }
        if (i > LITERAL_BOUND) {
            i = i - LITERAL_BOUND;
            int first = i / maxToken;
            int second = i % maxToken;
            result = unmap[first] + unmap[second];
        } else {
            int value = links[i];
            int lead = value >>> 16;
            int trail = value & 0xFFFF;
            //if (DEBUG) System.out.println("lead: " + lead + ", trail: " + trail);
            result = lookup(lead) + lookup(trail);
        }
        if (trailingSpace) result += ' ';
        if (DEBUG) System.out.println("token: " + i + " => '" + result + "'");
        return result;
    }
    static int getInt(String s) {
        if (s.length() < 3) {
            if (s.length() == 0) return 0;
            int first = s.charAt(0);
            int second = s.length() > 1 ? s.charAt(1) : 0;
            return LITERAL_BOUND + (remap[first] * maxToken + remap[second]);
        }
        Object in = stringToInt.get(s);
        if (in == null) return -1;
        return ((Integer)in).intValue();
    }
    static int putString(String s, int lead, int trail) {
        Object in = stringToInt.get(s);
        if (in != null) throw new IllegalArgumentException();
        int value = (lead << 16) + (trail & 0xFFFF);
        int result = lastLink;
        links[lastLink++] = value;
        if (DEBUG) {
            System.out.println("'" + s + "', link[" + result + "] = lead: " + lead + ", trail: " + trail);
            String roundTrip = lookup(result);
            if (!roundTrip.equals(s)) {
                System.out.println("\t*** No Round Trip: '" + roundTrip + "'");
            }
        }
        stringToInt.put(s, new Integer(result));
        return result;
    }
    // s cannot have a trailing space. Must be <,>,-,SPACE,0-9,A-Z
    static int addString(String s) {
        int result = getInt(s);
        if (result != -1) return result;
        int limit = s.length() - 1;
        int bestLen = 0;
        int best_i = 0;
        int bestSpaceLen = 0;
        int bestSpace_i = 0;
        int lastSpace = -1;
        int spaceBits;
        int endOfFirst;
        // invariant. We break after a space if there is one.
        for (int i = 1; i < limit; ++i) {
            char c = s.charAt(i-1);
            spaceBits = 0;
            endOfFirst = i;
            if (c == ' ') {
                lastSpace = i;
                endOfFirst--;
                spaceBits = 0x8000;
            }
            String firstPart = s.substring(0, endOfFirst);
            String lastPart = s.substring(i);
            if (firstPart.equals("<START OF ")) {
                System.out.println("HUH");
            }
            int lead = getInt(firstPart);
            int trail = getInt(lastPart);
            if (lead >= 0 && trail >= 0) { // if both match, return immediately with pair
                if (DEBUG) System.out.println(s + " => '" + firstPart + (spaceBits != 0 ? "*" : "")
                    + "' # '" + lastPart + "' MATCH BOTH");
                return putString(s, spaceBits | lead, trail);
            }
            if (!isLiteral(lead)) {
                if (i > bestLen) {
                    bestLen = i;
                    best_i = i;
                }
                if (i > bestSpaceLen && c == ' ') {
                    bestSpaceLen = i;
                    bestSpace_i = i + 1;
                }
            }
            int end_i = s.length() - i;
            if (!isLiteral(trail)) {
                if (end_i > bestLen) {
                    bestLen = end_i;
                    best_i = i;
                }
                if (end_i > bestSpaceLen && c == ' ') {
                    bestSpaceLen = end_i;
                    bestSpace_i = i + 1;
                }
            }
        }
        if (lastSpace >= 0) {
            bestLen = bestSpaceLen;
            best_i = bestSpace_i;
        }
        spaceBits = 0;
        if (bestLen > 0) { // if one matches, recurse -- and return pair
            endOfFirst = best_i;
            if (lastSpace > 0) {
                --endOfFirst;
                spaceBits = 0x8000;
            }
            String firstPart = s.substring(0, endOfFirst);
            String lastPart = s.substring(best_i);
            int lead = getInt(firstPart);
            int trail = getInt(lastPart);
            if (lead >= 0) {
                if (DEBUG) System.out.println(s + " => '" + firstPart + (spaceBits != 0 ? "*" : "")
                    + "' # '" + lastPart + "' MATCH FIRST");
                return putString(s, spaceBits | lead, addString(lastPart));
            } else {
                if (DEBUG) System.out.println(s + " => '" + firstPart + (spaceBits != 0 ? "*" : "")
                    + "' # '" + lastPart + "' MATCH SECOND");
                return putString(s, spaceBits | addString(firstPart), trail);
            }
        }
        // otherwise, we failed to find anything. Then break before the last word, if there is one
        // otherwise break in the middle (but at even value)
        if (lastSpace >= 0) {
            best_i = lastSpace;
            endOfFirst = lastSpace - 1;
            spaceBits = 0x8000;
        } else {
            endOfFirst = best_i = ((s.length() + 1) / 4) * 2;
        }
        String firstPart = s.substring(0, endOfFirst);
        String lastPart = s.substring(best_i);
        if (DEBUG) System.out.println(s + " => '" + firstPart + (spaceBits != 0 ? "*" : "")
            + "' # '" + lastPart + "' FALLBACK");
        return putString(s, spaceBits | addString(firstPart), addString(lastPart));
    }
    /*
    static int addCompression(String s) {
        Object in = stringToInt.get(s);
        if (in != null) return ((Integer) in).intValue();
        // find best match, recursively
        int bestBreak = -1;
        boolean pickFirst = false;
        for (int i = 1; i < s.length() - 1; ++i) {
            char c = s.charAt(i);
            if (c == ' ' || c == '-') {
                Object pos1 = stringToInt.get(s.substring(0,i+1));
                //Object pos23 = stringToInt.get(s..substring(i));
                    if (pos2 >= 0 && pos3 >= 0) {
                        fullToCompressed.put(value, new Integer(index + reserved));
                        continue main;
                    }
                    if (pos2 >= 0) {
                         if (k > bestBreak) {
                            bestBreak = k;
                            pickFirst = true;
                         }
                    } else if (pos3 >= 0) {
                        if (value.length() - k > bestBreak) {
                            bestBreak = k;
                            pickFirst = false;
                        }
                    }
                }
            }
        }
    }
    static void gatherData() throws IOException {
        System.out.println("Gathering data");
        Counter counter = new Counter();
        String[] parts = new String[100];
        String[] parts2 = new String[100];
        int total = 0;
        for (int i = 0; i < 0x10FFFF; ++i) {
            //if ((i & 0xFF) == 0) System.out.println(Utility.hex(i));
            if (!ucd.isRepresented(i)) continue;
            String s = ucd.getName(i);
            total += s.length();
            int len = Utility.split(s, ' ', parts);
            for (int j = 0; j < len; ++j) {
                if (parts[j].indexOf('-') >= 0) {
                    // hyphen stuff
                    int len2 = Utility.split(parts[j], '-', parts2);
                    for (int k = 0; k < len2; ++k) {
                        if (k == len2 - 1) {
                            counter.add(parts2[k] + '-');
                        } else {
                            counter.add(parts2[k] + " ");
                        }
                    }
                } else {
                   // normal
                    counter.add(parts[j] + " ");
                }
            }
        }
        System.out.println("Sorting data");
        Map m = counter.extract();
        System.out.println("Printing data");
        PrintWriter log = new PrintWriter(
            new BufferedWriter(
            new OutputStreamWriter(
                new FileOutputStream(GEN_DIR + "NameCompression.txt")),
            32*1024));
        log.println("total: " + total);
        Iterator it = m.keySet().iterator();
        String mondo = "";
        int i = 0;
        int strTotal = 0;
        int index = 0;
        Map fullToCompressed = new HashMap();
        String mondoIndex = "";
        main:
        while (it.hasNext()) {
            index++;
            if ((i & 255) == 0) System.out.println("#" + i);
            Counter.RWInteger key = (Counter.RWInteger) it.next();
            String value =  (String)m.get(key);
            log.println(i++ + ": " + key + ": \"" + value + "\"");
            strTotal += value.length();
            // first 128 are the highest frequency, inc. space
            if (index < 128 - SINGLES) {
                mondo += value;
                fullToCompressed.put(value, new String((char)(index + reserved)));
                continue;
            }
            int pos = mondo.indexOf(value);
            if (pos >= 0) {
                // try splitting!
                int bestBreak = -1;
                boolean pickFirst = false;
                if (value.length() > 2) for (int k = 1; k < value.length()-1; ++k) {
                    int pos2 = mondo.indexOf(value.substring(0,k) + " ");
                    int pos3 = mondo.indexOf(value.substring(k));
                    if (pos2 >= 0 && pos3 >= 0) {
                        fullToCompressed.put(value, new Integer(index + reserved));
                        continue main;
                    }
                    if (pos2 >= 0) {
                         if (k > bestBreak) {
                            bestBreak = k;
                            pickFirst = true;
                         }
                    } else if (pos3 >= 0) {
                        if (value.length() - k > bestBreak) {
                            bestBreak = k;
                            pickFirst = false;
                        }
                    }
                }
                if (bestBreak > 0) {
                    if (pickFirst) {
                        mondo += value.substring(bestBreak);
                    } else {
                        mondo += value.substring(0, bestBreak) + " ";
                    }
                } else {
                    mondo += value;
                }
            }
            // high bit on, means 2 bytes, look in array
        }
        log.println("strTotal: " + strTotal);
        log.println("mondo: " + mondo.length());
        int k = 80;
        for (; k < mondo.length(); k += 80) {
            log.println(mondo.substring(k-80, k));
        }
        log.println(mondo.substring(k-80)); // last line
        log.close();
    }
    static int indexOf(StringBuffer target, String source) {
        int targetLen = target.length() - source.length();
        main:
        for (int i = 0; i <= targetLen; ++i) {
            for (int j = 0; j < source.length(); ++j) {
                if (target.charAt(i) != source.charAt(j)) continue main;
            }
            return i;
        }
        return -1;
    }
    static final int SINGLES = 26 + 10 + 2;
    */
    /*
    static String decode(int x) {
        if (x < SINGLES) {
            if (x < 26) return String.valueOf(x + 'A');
            if (x < 36) return String.valueOf(x - 26 + '0');
            if (x == 36) return "-";
            return " ";
        }
        if (x < binaryLimit) {
            x =
    */
 }
--- a/tools/unicodetools/com/ibm/text/UCD/CaseFoldingHeader.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/CaseFoldingHeader.txt
@ -1,47 +0,0 @@
 #
 # Case Folding Properties
 #
 # This file is a supplement to the UnicodeData file.
 # It provides a case folding mapping generated from the Unicode Character Database.
 # If all characters are mapped according to the full mapping below, then
 # case differences (according to UnicodeData.txt and SpecialCasing.txt)
 # are eliminated.
 #
 # The data supports both implementations that require simple case foldings
 # (where string lengths don't change), and implementations that allow full case folding
 # (where string lengths may grow). Note that where they can be supported, the
 # full case foldings are superior: for example, they allow "MASSE" and "Maße" to match.
 #
 # All code points not listed in this file map to themselves.
 #
 # NOTE: case folding does not preserve normalization formats!
 #
 # For information on case folding, see
 # UTR #21 Case Mappings, at http://www.unicode.org/unicode/reports/tr21/
 #
 # ================================================================================
 # Format
 # ================================================================================
 # The entries in this file are in the following machine-readable format:
 #
 # <code>; <status>; <mapping>; # <name>
 #
 # The status field is:
 # C: common case folding, common mappings shared by both simple and full mappings.
 # F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces.
 # S: simple case folding, mappings to single characters where different from F.
 # T: special case for uppercase I and dotted uppercase I
 #    - For non-Turkic languages, this mapping is normally not used.
 #    - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters.
 #      Note that the Turkic mappings do not maintain canonical equivalence without additional processing.
 #      See the discussions of case mapping in the Unicode Standard for more information.
 #
 # Usage:
 #  A. To do a simple case folding, use the mappings with status C + S.
 #  B. To do a full case folding, use the mappings with status C + F.
 #
 #    The mappings with status T can be used or omitted depending on the desired case-folding
 #    behavior. (The default option is to exclude them.)
 #
 # =================================================================
--- a/tools/unicodetools/com/ibm/text/UCD/CaseTestHeader.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/CaseTestHeader.txt
@ -1,47 +0,0 @@
 #
 # This file is used to test (1) case conversion, (2) case detection, 
 # and (3) case-insensitive matching.
 # (1) is represented below by function names such as toLower(),
 # (2) is represented below by function names such as isLower().
 # (3) is represented below by the function name equalsCaseInsensitive().
 # (The actual function names will vary depending on software language and/or library.)
 #
 # The test cases also check whether canonical equivalence is preserved
 # by these functions.
 #
 # Format:
 # <src> ; <lower> ; <upper> ; <title> ; <fold> (# <comment>)? 
 #
 # Test:
 #
 # A. For each line:
 #    1. Verify the following equalities:
 #           lower == toLower(src)
 #           upper == toUpper(src)
 #           title == toTitle(src)
 #           fold == toFold(src)
 #    2. Verify that all of the following are true:
 #           isLower(toLower(lower))
 #           isUpper(toUpper(upper))
 #           isTitle(toTitle(title))
 #           isFold(toTitle(fold))
 #    3. Verify that all of the following are true:
 #           equalsCaseInsensitive(src, lower)
 #           equalsCaseInsensitive(src, upper)
 #           equalsCaseInsensitive(src, title)
 #           equalsCaseInsensitive(src, fold)
 #
 # B. For each code point that is NOT listed as a src:
 #    1. Verify the following equalities:
 #           src == toLower(src) == toUpper(src) == toTitle(src) == toFold(src)
 #    2. Verify that all of the following are true:
 #           isLower(toLower(lower))
 #           isUpper(toUpper(upper))
 #           isTitle(toTitle(title))
 #           isFold(toTitle(fold))
 #    3. Verify that all of the following are true:
 #           equalsCaseInsensitive(src, lower)
 #           equalsCaseInsensitive(src, upper)
 #           equalsCaseInsensitive(src, title)
 #           equalsCaseInsensitive(src, fold)
 #
--- a/tools/unicodetools/com/ibm/text/UCD/Charts.java
+++ b/tools/unicodetools/com/ibm/text/UCD/Charts.java
@ -1,25 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Charts.java,v $
 * $Date: 2002/10/05 01:28:58 $
 * $Revision: 1.1 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCD;
 import com.ibm.icu.text.UnicodeSet;
 import java.io.*;
 import java.util.*;
 import com.ibm.icu.text.UTF16;
 import com.ibm.text.utility.*;
 public class Charts {
 }
--- a/tools/unicodetools/com/ibm/text/UCD/CheckCollator.java
+++ b/tools/unicodetools/com/ibm/text/UCD/CheckCollator.java
@ -1,351 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/CheckCollator.java,v $
 * $Date: 2002/08/09 23:56:24 $
 * $Revision: 1.2 $
 *
 *******************************************************************************
 */
 // http://java.sun.com/j2se/1.3/docs/guide/intl/encoding.doc.html
 package com.ibm.text.UCD;
 import java.util.*;
 import java.io.*;
 import java.text.NumberFormat;
 import com.ibm.text.utility.*;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;
 /**
 * This is a quick and dirty program to get some idea of collation performance, comparing old Java to new stuff.
 */
 abstract public class CheckCollator {
    static final String PREFIX = "C:\\ICUInternal\\icu4c\\collation-perf-data\\TestNames_";
    static final boolean DO_RAW = false;
    static final NumberFormat nf = NumberFormat.getInstance();
    static final NumberFormat percent = NumberFormat.getPercentInstance();
    static {
        nf.setMaximumFractionDigits(2);
    }
    public static void main(String[] args) throws IOException {
        // later, drive off of args
        // choices are: Asian, Chinese, Japanese, Japanese_h, Japanese_k, Korean, Latin, Russian, Thai
        //test(Locale.KOREAN, "Korean");
        test(Locale.ENGLISH, "Latin");
        test(Locale.FRENCH, "Latin");
        test(Locale.JAPANESE, "Japanese");
    }
    public static void test(Locale loc, String name) throws IOException {
        System.out.println();
        System.out.println("Testing " + loc.getDisplayName() + ", file: " + name);
        System.out.println();
        // get test data
        String fileName = PREFIX + name + ".txt";
        FileInputStream fis = new FileInputStream(fileName);
        InputStreamReader isr = new InputStreamReader(fis, "UnicodeLittle");
        BufferedReader br = new BufferedReader(isr, 32*1024);
        int counter = 0;
        ArrayList list = new ArrayList();
        while (true) {
            String line = Utility.readDataLine(br);
            if (line == null) break;
            if (line.length() == 0) continue;
            Utility.dot(counter++);
            list.add(line);
        }
        System.out.println("Read " + counter + " lines in file");
        int limit = 800; // put a limit on it to save time
        // pump it up if there aren't very many
        while (list.size() < limit) {
            list.addAll(list);
        }
        int size = list.size();
        // later, adjust these so we always get a reasonble number of tries
        int extraIterations = 200;
        if (size > limit) size = limit;
        String[] tests = new String [size];
        for (int i = 0; i < size; ++i) {
            tests[i] = (String) list.get(i);
        }
        // get collators
        com.ibm.icu.text.Collator newCol = com.ibm.icu.text.Collator.getInstance(loc);
        java.text.Collator oldCol = java.text.Collator.getInstance(loc);
        double startTime, endTime;
        double delta, oldDelta;
        String probe;
        // load classes at least once before starting
        newCol.compare("a", "b");
        oldCol.compare("a", "b");
        // ================================================
        // check sort key size
        int stringSize = 0, newSize = 0, oldSize = 0;
        for (int i = 0; i < size; ++i) {
            stringSize += tests[i].length() * 2;
            byte[] newKey = newCol.getCollationKey(tests[i]).toByteArray();
            newSize += newKey.length;
            byte[] oldKey = oldCol.getCollationKey(tests[i]).toByteArray();
            oldSize += oldKey.length;
        }
        delta = stringSize/(size + 0.0);
        System.out.println("string size: " + nf.format(delta) + " bytes per key");
        System.out.println();
        delta = oldDelta = (oldSize/(size + 0.0));
        System.out.println("old sortkey size: " + nf.format(delta) + " bytes per key ");
        delta = (newSize/(size + 0.0));
        System.out.println("new sortkey size: " + nf.format(delta) + " bytes per key " + percent.format(delta/oldDelta));
        System.out.println();
        // ================================================
        // Sort Key: old time
        // get overhead time
        counter = 0;
        startTime = System.currentTimeMillis();
        for (int i = 0; i < size; ++i) {
            for (int j = 0; j < size; ++j) {
                counter++;
            }
        }
        endTime = System.currentTimeMillis();
        double overhead = (1000*(endTime - startTime) / counter);
        System.out.println("overhead: " + nf.format((endTime - startTime) / counter) + " micros");
        counter = 0;
        startTime = System.currentTimeMillis();
        for (int i = 0; i < size; ++i) {
            probe = tests[i];
            for (int k = 0; k < extraIterations; ++k) {
                oldCol.getCollationKey(probe);
                counter++;
            }
        }
        endTime = System.currentTimeMillis();
        oldDelta = delta = (1000*(endTime - startTime) / counter) - overhead;
        System.out.println("Old sort key time: " + nf.format(delta)
            + " micros (" + counter + " iterations)");
        // Sort Key: new time
        counter = 0;
        startTime = System.currentTimeMillis();
        for (int i = 0; i < size; ++i) {
            probe = tests[i];
            for (int k = 0; k < extraIterations; ++k) {
                newCol.getCollationKey(probe);
                counter++;
            }
        }
        endTime = System.currentTimeMillis();
        delta = (1000*(endTime - startTime) / counter) - overhead;
        System.out.println("New sort key time: " + nf.format(delta)
            + " micros (" + counter + " iterations) " + percent.format(delta/oldDelta));
        System.out.println();
        // ================================================
        // Raw Compare
        if (DO_RAW) {
            // get overhead time
            counter = 0;
            startTime = System.currentTimeMillis();
            int opt = 0; // to keep the compiler from optimizing out
            for (int i = 0; i < size; ++i) {
                probe = tests[i];
                for (int j = 0; j < size; ++j) {
                    opt ^= probe.compareTo(tests[j]);
                    counter++;
                }
            }
            endTime = System.currentTimeMillis();
            overhead = (1000*(endTime - startTime) / counter);
            System.out.println("overhead: " + nf.format((endTime - startTime) / counter) + " micros");
            // Raw Compare: old time
            counter = 0;
            startTime = System.currentTimeMillis();
            for (int i = 0; i < size; ++i) {
                probe = tests[i];
                for (int j = 0; j < size; ++j) {
                    opt ^= oldCol.compare(probe, tests[j]);
                    counter++;
                }
            }
            endTime = System.currentTimeMillis();
            oldDelta = delta = (1000*(endTime - startTime) / counter) - overhead;
            System.out.println("Old raw compare time: " + nf.format(delta)
                + " micros (" + counter + " iterations)");
            // Raw Compare: new time
            counter = 0;
            startTime = System.currentTimeMillis();
            for (int i = 0; i < size; ++i) {
                probe = tests[i];
                for (int j = 0; j < size; ++j) {
                    opt ^= newCol.compare(probe, tests[j]);
                    counter++;
                }
            }
            endTime = System.currentTimeMillis();
            delta = (1000*(endTime - startTime) / counter) - overhead;
            System.out.println("New raw compare time: " + nf.format(delta)
                + " micros (" + counter + " iterations) " + percent.format(delta/oldDelta));
            System.out.println();
        }
        // ================================================
        // Binary Search
        // note: I don't worry about getting the binary search precisely right, since I just want to
        // see which strings would get compared.
        // overhead
        int iterations = (size * extraIterations);
        startTime = System.currentTimeMillis();
        Arrays.sort(tests);
        int opt2 = 0; // keep from optimizing out
        for (int i = 0; i < size; ++i) {
            probe = tests[i];
            for (int k = 0; k < extraIterations; ++k) {
                opt2 ^= Arrays.binarySearch(tests, probe);
            }
        }
        endTime = System.currentTimeMillis();
        overhead = delta = (1000*(endTime - startTime) / iterations);
        System.out.println("Overhead: " + nf.format(delta)
            + " micros (" + iterations + " iterations)");
        // old time
        startTime = System.currentTimeMillis();
        Arrays.sort(tests, oldCol);
        for (int i = 0; i < size; ++i) {
            probe = tests[i];
            for (int k = 0; k < extraIterations; ++k) {
                opt2 ^= Arrays.binarySearch(tests, probe, oldCol);
            }
        }
        endTime = System.currentTimeMillis();
        oldDelta = delta = (1000*(endTime - startTime) / iterations) - overhead;
        System.out.println("Old binary search time: " + nf.format(delta)
            + " micros (" + iterations + " iterations)");
        // new time
        Arrays.sort(tests, newCol);
        startTime = System.currentTimeMillis();
        for (int i = 0; i < size; ++i) {
            probe = tests[i];
            for (int k = 0; k < extraIterations; ++k) {
                opt2 ^= Arrays.binarySearch(tests, probe, newCol);
            }
        }
        endTime = System.currentTimeMillis();
        delta = (1000*(endTime - startTime) / iterations) - overhead;
        System.out.println("New binary search time: " + nf.format(delta)
            + " micros (" + iterations + " iterations) " + percent.format(delta/oldDelta));
        System.out.println();
        // ================================================
        // Sort
        String[] sortTests = (String[]) tests.clone();
        extraIterations = 5;
        iterations = (size * extraIterations);
        // overhead
        startTime = System.currentTimeMillis();
        for (int i = 0; i < size; ++i) {
            for (int k = 0; k < extraIterations; ++k) {
                System.arraycopy(tests, 0, sortTests, 0, tests.length); // copy array
                Arrays.sort(sortTests);
            }
        }
        endTime = System.currentTimeMillis();
        overhead = delta = (1000*(endTime - startTime) / iterations);
        System.out.println("overhead: " + nf.format(delta)
            + " micros (" + iterations + " iterations)");
        // old time
        startTime = System.currentTimeMillis();
        for (int i = 0; i < size; ++i) {
            for (int k = 0; k < extraIterations; ++k) {
                System.arraycopy(tests, 0, sortTests, 0, tests.length); // copy array
                Arrays.sort(sortTests, oldCol);
            }
        }
        endTime = System.currentTimeMillis();
        oldDelta = delta = (1000*(endTime - startTime) / iterations) - overhead;
        System.out.println("Old sort time: " + nf.format(delta)
            + " micros (" + iterations + " iterations)");
        // new time
        startTime = System.currentTimeMillis();
        for (int i = 0; i < size; ++i) {
            for (int k = 0; k < extraIterations; ++k) {
                System.arraycopy(tests, 0, sortTests, 0, tests.length); // copy array
                Arrays.sort(sortTests, newCol);
            }
        }
        endTime = System.currentTimeMillis();
        delta = (1000*(endTime - startTime) / iterations) - overhead;
        System.out.println("New sort time: " + nf.format(delta)
            + " micros (" + iterations + " iterations) " + percent.format(delta/oldDelta));
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/CheckICU.java
+++ b/tools/unicodetools/com/ibm/text/UCD/CheckICU.java
@ -1,327 +0,0 @@
 package com.ibm.text.UCD;
 import java.io.IOException;
 import java.io.PrintWriter;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
 import java.util.TreeMap;
 import java.util.TreeSet;
 import com.ibm.icu.dev.test.util.BagFormatter;
 import com.ibm.icu.dev.test.util.UnicodeLabel;
 import com.ibm.icu.dev.test.util.UnicodeProperty;
 import com.ibm.icu.dev.test.util.ICUPropertyFactory;
 import com.ibm.icu.lang.UProperty;
 import com.ibm.icu.text.Collator;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.util.ULocale;
 import com.ibm.text.utility.Utility;
 public class CheckICU {
    static final BagFormatter bf = new BagFormatter();
    public static void main(String[] args) throws IOException {
        System.out.println("Start");
        test();
        System.out.println("End");
    }
    static UnicodeSet itemFailures;
    static ICUPropertyFactory icuFactory;
    static ToolUnicodePropertySource toolFactory;
    static class ReplaceLabel extends UnicodeLabel {
        UnicodeProperty p;
        ReplaceLabel(UnicodeProperty p) {
            this.p = p;
        }
        public String getValue(int codepoint, boolean isShort) {
            // TODO Auto-generated method stub
            return p.getValue(codepoint, isShort).replace('_',' ');
        }
        public int getMaxWidth(boolean v) {
            return p.getMaxWidth(v);           
        }
    }
    public static void test() throws IOException {
        checkAvailable();
        if (true) return;
        checkUCD();
        itemFailures = new UnicodeSet();
        icuFactory = ICUPropertyFactory.make();
        toolFactory = ToolUnicodePropertySource.make("4.0.0");
        String[] quickList = {
            // "Canonical_Combining_Class",
            // "Script", "Bidi_Mirroring_Glyph", "Case_Folding",
            //"Numeric_Value"
        };
        for (int i = 0; i < quickList.length; ++i) {
            testProperty(quickList[i], -1);
        }
        if (quickList.length > 0) return;
        Collection availableTool = toolFactory.getAvailableNames();
        Collection availableICU = icuFactory.getAvailableNames();
        System.out.println(showDifferences("Property Aliases", "ICU", availableICU, "Tool", availableTool));
        Collection common = new TreeSet(availableICU);
        common.retainAll(availableTool);
        for (int j = UnicodeProperty.BINARY; j < UnicodeProperty.LIMIT_TYPE; ++j) {
            System.out.println();
            System.out.println(UnicodeProperty.getTypeName(j));
            Iterator it = common.iterator();
            while (it.hasNext()) {
                String prop = (String)it.next();
                testProperty(prop, j);
            }
        }
    }
    /**
 	 * 
 	 */
 	private static void checkAvailable() {
 		//generateFile("4.0.0", "DerivedCombiningClass");
        //generateFile("4.0.0", "DerivedCoreProperties");
    	ULocale[] locales = Collator.getAvailableULocales();
    	System.out.println("Collation");
    	System.out.println("Possible keyword=values pairs:");
    	{
 	    	String[] keywords = Collator.getKeywords();
 	    	for (int i = 0; i < Collator.getKeywords().length; ++i) {
 	    		String[] values = Collator.getKeywordValues(keywords[i]);
 	    		for (int j = 0; j < values.length; ++j) {
 	    			System.out.println("\t" + keywords[i] + "=" + values[j]);
 	    		}
 	    	}
    	}
    	System.out.println("Differing Collators:");
    	Set testSet = new HashSet(Arrays.asList(new String[] {
    		"nl", "de", "de_DE", "zh_TW"
    	}));
    	for (int k = 0; k < locales.length; ++k) {
    		if (!testSet.contains(locales[k].toString())) continue;
 			showCollationVariants(locales[k]);
    	}
 	}
 	/**
 	 * 
 	 */
 	private static void showCollationVariants(ULocale locale) {
 		String[] keywords = Collator.getKeywords();
 		System.out.println(locale.getDisplayName(ULocale.ENGLISH) + " [" + locale + "]");
 		for (int i = 0; i < Collator.getKeywords().length; ++i) {
 			ULocale base = Collator.getFunctionalEquivalent(keywords[i], 
 					locale
 					//new ULocale(locale + "@" + keywords[i] + "=standard")
 					);
 			if (true) System.out.println("\"" + base + "\" == Collator.getFunctionalEquivalent(\"" + keywords[i] + "\", \"" + locale + "\");");
 			String[] values = Collator.getKeywordValues(keywords[i]);
 			for (int j = 0; j < Collator.getKeywordValues(keywords[i]).length; ++j) {       			
 				ULocale other = Collator.getFunctionalEquivalent(keywords[i], 
 						new ULocale(locale + "@" + keywords[i] + "=" + values[j]));
 				if (true) System.out.println(
 						"\"" + other
 						+ "\" == Collator.getFunctionalEquivalent(\"" + keywords[i]
 						+ "\", new ULocale(\""
 						+ locale + "@" + keywords[i] + "=" + values[j] + "\");");
 				// HACK: commented line should work but doesn't
 				if (!other.equals(base)) {
 				//if (other.toString().indexOf("@") >= 0) {
 					System.out.println("\t" + keywords[i] + "=" + values[j] + "; \t" + base + "; \t" + other);
 				}
 			}
 		}
 	}
 /**
 * Sample code that prints out the variants that 'make a difference' for a given locale.
 * To iterate through the locales, use Collator.getVariant
 */
 private static void showCollationVariants2(ULocale locale) {
 	String[] keywords = Collator.getKeywords();
 	System.out.println(locale.getDisplayName(ULocale.ENGLISH) + " [" + locale + "]");
 	for (int i = 0; i < Collator.getKeywords().length; ++i) {
 		ULocale base = Collator.getFunctionalEquivalent(keywords[i], locale);
 		String[] values = Collator.getKeywordValues(keywords[i]);
 		for (int j = 0; j < Collator.getKeywordValues(keywords[i]).length; ++j) {       			
 			ULocale other = Collator.getFunctionalEquivalent(keywords[i], 
 					new ULocale(locale + "@" + keywords[i] + "=" + values[j]));
 			if (!other.equals(base)) {
 				System.out.println("\t" + keywords[i] + "=" + values[j] + "; \t" + base + "; \t" + other);
 			}
 		}
 	}
 }
 	private static void checkUCD() throws IOException {
        UCD myUCD = UCD.make("4.0.0");
        Normalizer nfc = new Normalizer(Normalizer.NFC, "4.0.0");
        UnicodeSet leading = new UnicodeSet();
        UnicodeSet trailing = new UnicodeSet();
        UnicodeSet starter = new UnicodeSet();
        for (int i = 0; i <= 0x10FFFF; ++i) {
            if (myUCD.getCombiningClass(i) == 0) starter.add(i);
            if (nfc.isTrailing(i)) trailing.add(i);
            if (nfc.isLeading(i)) leading.add(i);
        }
        PrintWriter pw = bf.openUTF8Writer(UCD_Types.GEN_DIR, "Trailing.txt");
        pw.println("+Trailing+Starter");
        bf.showSetNames(pw,  new UnicodeSet(trailing).retainAll(starter));
        pw.println("+Trailing-Starter");
        bf.showSetNames(pw, new UnicodeSet(trailing).removeAll(starter));
        pw.println("-Trailing-Starter");
        bf.showSetNames(pw, new UnicodeSet(trailing).complement().removeAll(starter));
        pw.println("+Trailing+Leading");
        bf.showSetNames(pw, new UnicodeSet(trailing).retainAll(leading));
        pw.println("+Trailing-Leading");
        bf.showSetNames(pw, new UnicodeSet(trailing).removeAll(leading));
        pw.close();
    }
    /*
     *                 int icuType;
                int toolType;
                Collection icuAliases;
                Collection toolAliases;
                String firstDiffICU;
                String firstDiffTool;
                String firstDiffCP;
                String icuProp;
                String toolProp;
     */
    private static void testProperty(String prop, int typeFilter) {
        UnicodeProperty icuProp = icuFactory.getProperty(prop);
        int icuType = icuProp.getType();
        if (typeFilter >= 0 && icuType != typeFilter) return;
        System.out.println();
        System.out.println("Testing: " + prop);
        UnicodeProperty toolProp = toolFactory.getProperty(prop);
        int toolType = toolProp.getType();
        if (icuType != toolType) {
            System.out.println("FAILURE Type: ICU: " + UnicodeProperty.getTypeName(icuType)
                + "\tTool: " + UnicodeProperty.getTypeName(toolType));
        }
        Collection icuAliases = icuProp.getNameAliases(new ArrayList());
        Collection toolAliases = toolProp.getNameAliases(new ArrayList());
        System.out.println(showDifferences("Aliases", "ICU", icuAliases, "Tool", toolAliases));
        icuAliases = icuProp.getAvailableValues(new ArrayList());
        toolAliases = toolProp.getAvailableValues(new ArrayList());
        System.out.println(showDifferences("Value Aliases", "ICU", icuAliases, "Tool", toolAliases));
        // TODO do property value aliases
        itemFailures.clear();
        String firstDiffICU = null, firstDiffTool = null, firstDiffCP = null;
        for (int i = 0; i <= 0x10FFFF; ++i) {
            /*if (i == 0x0237) {
                System.out.println();
            }
            */
            String icuValue = icuProp.getValue(i);
            String toolValue = toolProp.getValue(i);
            if (!equals(icuValue, toolValue)) {
                itemFailures.add(i);
                if (firstDiffCP == null) {
                    firstDiffICU = icuValue;
                    firstDiffTool = toolValue;
                    firstDiffCP = Utility.hex(i);
                }
            }
        }
        if (itemFailures.size() != 0) {
            System.out.println("FAILURE " + itemFailures.size() + " Differences: ");
            System.out.println(itemFailures.toPattern(true));
            if (firstDiffICU != null) firstDiffICU = bf.hex.transliterate(firstDiffICU);
            if (firstDiffTool != null) firstDiffTool = bf.hex.transliterate(firstDiffTool);
            System.out.println(firstDiffCP 
                + "\tICU: <" + firstDiffICU
                + ">\tTool: <" + firstDiffTool + ">");
        }
        System.out.println("done"); 
        // do values later, and their aliases
        /*
        System.out.println("-Values");
        UnicodeSet
        System.out.println(showDifferences("ICU", availableICU, "Tool", availableTool));
        */
    }
    static boolean equals(Object a, Object b) {
        if (a == null) return b == null;
        return a.equals(b);
    }
    static public String showDifferences(
        String title,
        String name1,
        Collection set1,
        String name2,
        Collection set2) {
        Collection temp = new TreeSet(set1);
        temp.retainAll(set2);
        if (set1.size() == temp.size()) {
            return title + ": " + name1 + " == " + name2 + ": " + bf.join(set1);
        }
        StringBuffer result = new StringBuffer();
        result.append(title + "\tFAILURE\r\n");
        result.append("\t" + name1 + " = " + bf.join(set1) + "\r\n");
        result.append("\t" + name2 + " = " + bf.join(set2) + "\r\n");
        // damn'd collection doesn't have a clone, so
        // we go with Set, even though that
        // may not preserve order and duplicates
         if (temp.size() != 0) {
            result.append("\t" + name2 + " & " + name1 + ":\r\n");
            result.append("\t" + bf.join(temp));
            result.append("\r\n");
        }
        temp.clear();
        temp.addAll(set1);
        temp.removeAll(set2);
        if (temp.size() != 0) {
            result.append("\t" + name1 + " - " + name2 + ":\r\n");
            result.append("\t" + bf.join(temp));
            result.append("\r\n");
        }
        temp.clear();
        temp.addAll(set2);
        temp.removeAll(set1);
        if (temp.size() != 0) {
            result.append("\t" + name2 + " - " + name1 + ":\r\n");
            result.append("\t" + bf.join(temp));
            result.append("\r\n");
        }
        return result.toString();
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/ChineseFrequency.java
+++ b/tools/unicodetools/com/ibm/text/UCD/ChineseFrequency.java
@ -1,81 +0,0 @@
 package com.ibm.text.UCD;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.PrintWriter;
 import java.util.Comparator;
 import java.util.Iterator;
 import java.util.Map;
 import java.util.Set;
 import java.util.TreeMap;
 import java.util.TreeSet;
 import com.ibm.icu.dev.test.util.BagFormatter;
 import com.ibm.icu.text.DecimalFormat;
 import com.ibm.icu.text.NumberFormat;
 import com.ibm.icu.text.UTF16;
 import com.ibm.text.utility.Pair;
 import com.ibm.text.utility.Utility;
 public class ChineseFrequency {
    static final String DICT_DIR = "C:\\DATA\\dict\\";
    static NumberFormat percent = new DecimalFormat("0.000000%");
    static NumberFormat percent3 = new DecimalFormat("000.000000%");
    static NumberFormat number = new DecimalFormat("#,##0");
    static class InverseCompareTo implements Comparator {
        public int compare(Object o1, Object o2) {
             return -((Comparable)o1).compareTo(o2);
        }        
    }
    public static void test() throws IOException{
        Set freq_char = new TreeSet(new InverseCompareTo());
        BufferedReader br = BagFormatter.openUTF8Reader(DICT_DIR, "kHYPLCDPF.txt");
        double grandTotal = 0.0;
        while (true) {
            String line = br.readLine();
            if (line == null) break;
            String[] pieces = Utility.split(line,'\t');
            int cp = Integer.parseInt(pieces[0],16);
            String[] says = Utility.split(pieces[1],',');
            long total = 0;
            for (int i = 0; i < says.length; ++i) {
                int start = says[i].indexOf('(');
                int end = says[i].indexOf(')');
                long count = Long.parseLong(says[i].substring(start+1, end));
                total += count;
            }
            grandTotal += total;
            freq_char.add(new Pair(new Long(total), new Integer(cp)));
        }
        br.close();
        PrintWriter pw = BagFormatter.openUTF8Writer(DICT_DIR,"kHYPLCDPF_frequency.txt");
        pw.write("\uFEFF");
        pw.println("No.\tPercentage\tAccummulated\tHex\tChar");
        Iterator it = freq_char.iterator();
        int counter = 0;
        double cummulative = 0;
        double cummulativePercentage = 0;
        while (it.hasNext()) {
            Pair item = (Pair)it.next();
            Long total = (Long) item.first;
            Integer cp = (Integer) item.second;
            double current = total.longValue();
            cummulative += current;
            double percentage = current / grandTotal;
            cummulativePercentage += percentage;
            pw.println(
                ++counter
            //+ "\t" + number.format(current)
            //+ "\t" + number.format(cummulative)
            + "\t" + percent.format(percentage)
            + "\t" + percent3.format(cummulativePercentage)
                + "\t" + Integer.toHexString(cp.intValue()).toUpperCase()
                + "\t" + UTF16.valueOf(cp.intValue()));
        }
        //pw.println("Grand total: " + (long)grandTotal);
        pw.close();
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/CodePointProperty.java
+++ b/tools/unicodetools/com/ibm/text/UCD/CodePointProperty.java
@ -1,106 +0,0 @@
 package com.ibm.text.UCD;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.text.utility.*;
 import java.util.*;
 // Enumerated properties will be IntCodePointProperty.
 // The string values they return will be the property value names.
 // Binary properties are Enumerated properties. They return 0 or 1
 abstract public class CodePointProperty {
    // styles for names and string values
    static final byte SHORT = 0, DEFAULT = 1, LONG = 2, NORMAL_LIMIT = 3;
    // gets the property name
    abstract public String getName(byte style);
    // value may also be numeric, etc, but this returns string equivalent.
    abstract public String getValue(int codePoint, byte style);
    // returns true if the code point has the value
    // works with any style that getValue takes
    abstract public boolean hasValue(int codePoint, String value);
    // returns the set of all code points with that value.
    // same effect as using hasValue one by one, but faster internal implementation
    abstract public UnicodeSet getSet(String value);
    // returns a list of all possible values
    // logically the same as looping from 0..10FFFF with getValue and getStyleLimit,
    // and throwing out duplicates, but much faster.
    static Iterator getAllValues(byte style) {
        return null;
    }
    // gets top value style available for this property
    public byte getStyleLimit(byte style) {
        return NORMAL_LIMIT;
    }
    // returns true if the value is known to be uniform over a type.
    // this is used for various optimizations, especially for Cn & Co
    public boolean isUniformOverCategory(byte generalCategory) {
        return false;
    }
    // subclasses
    static abstract public class IntCodePointProperty extends CodePointProperty {
        abstract int getNumericValue(int codePoint);
        abstract int getMaxValue();
        abstract int getMinValue();
        static Iterator getAllNumericValues() {
            return null;
        }
    }
    static abstract public class DoubleCodePointProperty extends CodePointProperty {
        abstract double getNumericValue(int codePoint);
        abstract double getMaxValue();
        abstract double getMinValue();
        static Iterator getAllNumericValues() {
            return null;
        }
    }
    // registration and lookup
    // register a new property
    static void register(CodePointProperty newProp) {
        //...
    }
    // finds a registered property by name
    static CodePointProperty getInstance(String name) {
        return null;
    }
    // returns a list of all registered properties
    static Iterator getAllRegistered() {
        return null;
    }
    // UnicodeSet would use these internally to handle properties. That is, when
    // it encountered ... [:name=value:] ...
    // it would do:
    //        CodePointProperty x = getInstance(name);
    //        if (x != null) doError(name, value);
    //        UnicodeSet s = x.getSet(value);
    // and then use s.
    // open issue: we could have a property like: contains("dot")
    // in that case, we would register "contains" as the 'base' name,
    // but allow lookup with string parameters ("dot")
    // Maybe just adding:
    public boolean hasParameters() {
        return false;
    }
    public void setParameters(String parameters) {}
    public String getParameters() {
        return null;
    }
    // that way we could have [[:letter:]&[:contains(dot):]]
 }
--- a/tools/unicodetools/com/ibm/text/UCD/CompactName.java
+++ b/tools/unicodetools/com/ibm/text/UCD/CompactName.java
@ -1,273 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/CompactName.java,v $
 * $Date: 2001/08/31 00:30:17 $
 * $Revision: 1.2 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCD;
 import java.io.IOException;
 import java.util.*;
 import java.io.*;
 import java.text.*;
 public class CompactName {
    static final boolean DEBUG = false;
    public static void main(String[] args) throws IOException {
        int test = tokenFromString("ABZ");
        String ss = stringFromToken(test);
        System.out.println(ss);
        CompactName.addWord("ABSOLUTEISM");
        for (int i = 0; i < CompactName.lastToken; ++i) {
            String s = CompactName.stringFromToken(i);
            System.out.println(s);
        }
    }
    static final char[] compactMap = new char[128];
    static final char[] compactUnmap = new char[128];
    static {
        char counter = 0;
        compactMap[0] = counter++;
        for (int i = 'A'; i <= 'Z'; ++i) {
            compactMap[i] = counter++;
        }
        compactMap['-'] = counter++;
        compactMap['>'] = counter++;
        compactMap['<'] = counter++;
        compactMap['*'] = counter++;
        compactUnmap[0] = 0;
        for (char i = 0; i < compactUnmap.length; ++i) {
            int x = compactMap[i];
            if (x != 0) compactUnmap[x] = i;
        }
    }
    /*
    static String expand(String s) {
        StringBuffer result = new StringBuffer();
        for (int i = 0; i < s.length(); ++i) {
            int m = s.charAt(i);
            if (m == 31 && i < s.length() + 1) {
                m = 31 + s.charAt(++i);
            }
            result.append(compactUnmap[m]);
        }
        return result.toString();
    }
    static String compact(String s) {
        StringBuffer result = new StringBuffer();
        for (int i = 0; i < s.length(); ++i) {
            int m = compactMap[s.charAt(i)];
            if (m >= 31) {
                result.append((char)31);
                m -= 31;
            }
            result.append(m);
        }
        return result.toString();
    }
    */
    static Map string_token = new HashMap();
    static Map token_string = new HashMap();
    static int[] tokenList = new int[40000];
    static final int tokenStart = 0;
    static int lastToken = 0;
    static int spacedMinimum = Integer.MAX_VALUE;
    static boolean isLiteral(int i) {
        return (i & 0x8000) != 0;
    }
    static int addTokenForString(String s, int lead, int trail) {
        Object in = string_token.get(s);
        if (in != null) throw new IllegalArgumentException();
        int value = (lead << 16) + (trail & 0xFFFF);
        int result = lastToken;
        tokenList[lastToken++] = value;
        if (DEBUG) {
            System.out.println("'" + s + "', tokenList[" + result + "] = lead: " + lead + ", trail: " + trail);
            String roundTrip = stringFromToken(result);
            if (!roundTrip.equals(s)) {
                System.out.println("\t*** No Round Trip: '" + roundTrip + "'");
            }
        }
        string_token.put(s, new Integer(result));
        return result;
    }
    static String stringFromToken(int i) {
        String result;
        if ((i & 0x8000) != 0) {
            char first = compactUnmap[(i >> 10) & 0x1F];
            char second = compactUnmap[(i >> 5) & 0x1F];
            char third = compactUnmap[i & 0x1F];
            result = String.valueOf(first);
            if (second != 0) result += String.valueOf(second);
            if (third != 0) result += String.valueOf(third);
        } else if (i > lastToken) {
            throw new IllegalArgumentException("bad token: " + i);
        } else {
            int value = tokenList[i];
            int lead = value >>> 16;
            int trail = value & 0xFFFF;
            if (i >= spacedMinimum) result = stringFromToken(lead) + ' ' + stringFromToken(trail);
            else result = stringFromToken(lead) + stringFromToken(trail);
        }
        if (DEBUG) System.out.println("token: " + i + " => '" + result + "'");
        return result;
    }
    static int tokenFromString(String s) {
        if (s.length() <= 3) {
            int first = compactMap[s.charAt(0)];
            int second = compactMap[s.length() > 1 ? s.charAt(1) : 0];
            int third = compactMap[s.length() > 2 ? s.charAt(2) : 0];
            return 0x8000 + (first << 10) + (second << 5) + third;
        }
        Object in = string_token.get(s);
        if (in == null) return -1;
        return ((Integer)in).intValue();
    }
    static int addWord(String s) {
        int result = tokenFromString(s);
        if (result != -1) return result;
        int bestLen = 0;
        int best_i = 0;
        int limit = s.length() - 1;
        for (int i = limit; i >= 1; --i) {
            String firstPart = s.substring(0, i);
            String lastPart = s.substring(i);
            int lead = tokenFromString(firstPart);
            int trail = tokenFromString(lastPart);
            if (lead >= 0 && trail >= 0) { // if both match, return immediately with pair
                if (DEBUG) show(s, firstPart, lastPart, "MATCH BOTH");
                return addTokenForString(s, lead, trail);
            }
            if (!isLiteral(lead)) {
                if (i > bestLen) {
                    bestLen = i;
                    best_i = i;
                }
            }
            if (!isLiteral(trail)) {
                int end_i = s.length() - i;
                if (end_i > bestLen) {
                    bestLen = end_i;
                    best_i = i;
                }
            }
        }
        if (bestLen > 0) { // if one matches, recurse -- and return pair
            String firstPart = s.substring(0, best_i);
            String lastPart = s.substring(best_i);
            int lead = tokenFromString(firstPart);
            int trail = tokenFromString(lastPart);
            if (lead >= 0) {
                if (DEBUG) show(s, firstPart, lastPart, "MATCH FIRST");
                return addTokenForString(s, lead, addWord(lastPart));
            } else {
                if (DEBUG) show(s, firstPart, lastPart, "MATCH SECOND");
                return addTokenForString(s, addWord(firstPart), trail);
            }
        }
        // break at multiple of 3
        best_i = ((s.length() + 1) / 6) * 3;
        String firstPart = s.substring(0, best_i);
        String lastPart = s.substring(best_i);
        if (DEBUG) show(s, firstPart, lastPart, "Fallback");
        return addTokenForString(s, addWord(firstPart), addWord(lastPart));
    }
    static void show(String s, String firstPart, String lastPart, String comment) {
        System.out.println((s) + " => '" + (firstPart)
            + "' # '" + (lastPart) + "' " + comment);
    }
    static void startLines() {
        spacedMinimum = lastToken;
    }
    static int addLine(String s) {
        int result = tokenFromString(s);
        if (result != -1) return result;
        int bestLen = 0;
        int best_i = 0;
        int limit = s.length() - 2;
        for (int i = limit; i >= 1; --i) {
            char c = s.charAt(i);
            if (c != ' ') continue;
            String firstPart = s.substring(0, i);
            String lastPart = s.substring(i+1);
            int lead = tokenFromString(firstPart);
            int trail = tokenFromString(lastPart);
            if (lead >= 0 && trail >= 0) { // if both match, return immediately with pair
                if (DEBUG) show(s, firstPart, lastPart, "MATCH BOTH");
                return addTokenForString(s, lead, trail);
            }
            if (i > bestLen) {
                bestLen = i;
                best_i = i;
            }
            int end_i = s.length() - i - 1;
            if (end_i > bestLen) {
                bestLen = end_i;
                best_i = i;
            }
        }
        if (bestLen > 0) { // if one matches, recurse -- and return pair
            String firstPart = s.substring(0, best_i);
            String lastPart = s.substring(best_i + 1);
            int lead = tokenFromString(firstPart);
            int trail = tokenFromString(lastPart);
            if (lead >= 0) {
                if (DEBUG) show(s, firstPart, lastPart, "MATCH FIRST");
                return addTokenForString(s, lead, addLine(lastPart));
            } else {
                if (DEBUG) show(s, firstPart, lastPart, "MATCH SECOND");
                return addTokenForString(s, addLine(firstPart), trail);
            }
        }
        System.out.println("SHOULD HAVE MATCHED!!");
        throw new IllegalArgumentException("SHOULD HAVE MATCHED!! " + s);
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/Compare14652.java
+++ b/tools/unicodetools/com/ibm/text/UCD/Compare14652.java
@ -1,387 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Compare14652.java,v $
 * $Date: 2004/02/07 01:01:16 $
 * $Revision: 1.3 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCD;
 import java.util.*;
 import java.io.*;
 import com.ibm.text.utility.*;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;
 // quick and dirty function for grabbing contents of ISO 14652 file
 public class Compare14652 implements UCD_Types {
    static final boolean oldVersion = false;
    public static UnicodeSet getSet(int prop, byte propValue) {
        return UnifiedBinaryProperty.make(prop | propValue).getSet();
    }
    static UnicodeSet
        titleSet = getSet(CATEGORY, Lt),
        combiningSet = getSet(CATEGORY, Mc)
            .addAll(getSet(CATEGORY, Me))
            .addAll(getSet(CATEGORY, Mn)),
        zSet = getSet(CATEGORY, Zs)
            .addAll(getSet(CATEGORY, Zl))
            .addAll(getSet(CATEGORY, Zp)),
        pSet = getSet(CATEGORY, Pd)
            .addAll(getSet(CATEGORY, Ps))
            .addAll(getSet(CATEGORY, Pe))
            .addAll(getSet(CATEGORY, Pc))
            .addAll(getSet(CATEGORY, Po))
            .addAll(getSet(CATEGORY, Pi))
            .addAll(getSet(CATEGORY, Pf)),
        sSet = getSet(CATEGORY, Sm)
            .addAll(getSet(CATEGORY, Sc))
            .addAll(getSet(CATEGORY, Sk))
            .addAll(getSet(CATEGORY, So)),
        noSet = getSet(CATEGORY, No),
        csSet = getSet(CATEGORY, Cs),
        cfSet = getSet(CATEGORY, Cf),
        cnSet = getSet(CATEGORY, Cn),
        circled = getSet(DECOMPOSITION_TYPE, COMPAT_CIRCLE),
        whitespaceSet = getSet(BINARY_PROPERTIES, White_space),
        alphaSet = getSet(DERIVED, PropAlphabetic).addAll(combiningSet),
        lowerSet = getSet(DERIVED, PropLowercase).addAll(titleSet).removeAll(circled),
        upperSet = getSet(DERIVED, PropUppercase).addAll(titleSet).removeAll(circled),
        digitSet = getSet(CATEGORY, Nd),
        xdigitSet = new UnicodeSet("[a-fA-F\uFF21-\uFF26\uFF41-\uFF46]").addAll(digitSet),
        spaceSet = whitespaceSet.size() == 0 ? zSet : whitespaceSet,
        controlSet = getSet(CATEGORY, Cc),
        punctSet = new UnicodeSet(pSet).addAll(sSet),
        graphSet = new UnicodeSet(0,0x10ffff)
            .removeAll(controlSet)
            //.removeAll(getSet(CATEGORY, Cf))
            .removeAll(csSet)
            .removeAll(cnSet)
            .removeAll(zSet),
            // Cc, Cf, Cs, Cn, Z
        blankSet = new UnicodeSet(spaceSet).removeAll(new UnicodeSet("[\\u000A-\\u000D\\u0085]"))
            .removeAll(getSet(CATEGORY, Zl))
            .removeAll(getSet(CATEGORY, Zp));
    static class Prop {
        String name;
        UnicodeSet contents = new UnicodeSet();
        String guess = "???";
        UnicodeSet guessContents = new UnicodeSet();
        String wsname = whitespaceSet.size() == 0 ? "gc=Z" : "Whitespace";
        Prop(String name) {
            this.name = name;
            if (name.equals("alpha")) {
                guess = "Alphabetic + gc=M";
                guessContents = alphaSet;
            } else if (name.equals("lower")) {
                guess = "Lowercase + gc=Lt - dt=circle";
                guessContents = lowerSet;
            } else if (name.equals("upper")) {
                guess = "Uppercase + gc=Lt - dt=circle";
                guessContents = upperSet;
            } else if (name.equals("digit")) {
                guess = "gc=Nd";
                guessContents = digitSet;
            } else if (name.equals("xdigit")) {
                guess = "gc=Nd+a..f (upper/lower,normal/fullwidth)";
                guessContents = xdigitSet;
            } else if (name.equals("space")) {
                guess = wsname;
                guessContents = spaceSet;
                //Utility.showSetNames("Whitespace", spaceSet, true, Default.ucd);
            } else if (name.equals("cntrl")) {
                guess = "gc=Cc";
                guessContents = controlSet;
            } else if (name.equals("punct")) {
                guess = "gc=P,S";
                guessContents = punctSet;
            } else if (name.equals("graph")) {
                guess = "All - gc=Cc, Cs, Cn, or Z";
                guessContents = graphSet;
            } else if (name.equals("blank")) {
                guess = wsname + " - (LF,VT,FF,CR,NEL + gc=Zl,Zp)";
                guessContents = blankSet;
            } else if (name.equals("ISO_14652_class \"combining\"")) {
                guess = "gc=M";
                guessContents = combiningSet;
            }
 /*upper
 lower
 alpha
 digit
 outdigit
 space
 cntrl
 punct
 graph
 xdigit
 blank
 toupper
 tolower
 */
        }
        void show(PrintWriter pw) {
            if (name.equals("ISO_14652_LC_CTYPE")) return;
            if (name.equals("ISO_14652_toupper")) return;
            if (name.equals("ISO_14652_tolower")) return;
            if (name.equals("ISO_14652_outdigit")) return;
            if (name.equals("ISO_14652_outdigit")) return;
            if (name.startsWith("ISO_14652_class")) return;
            pw.println();
            pw.println("**************************************************");
            pw.println(name);
            pw.println("**************************************************");
            Utility.showSetDifferences(pw, name, contents, guess, guessContents, false, true, null, Default.ucd());
            //pw.println(props[i].contents);
        }
    }
    static Prop[] props = new Prop[100];
    static int propCount = 0;
    public static void main(String[] args) throws IOException {
        String version = Default.ucd().getVersion();
        PrintWriter log = Utility.openPrintWriter("Diff14652_" + version + ".txt", Utility.UTF8_WINDOWS);
        try {
            log.write('\uFEFF');
            log.print("Version: " + version);
            if (false) {
                UnicodeSet ID = getSet(DERIVED, ID_Start).addAll(getSet(DERIVED, ID_Continue_NO_Cf));
                UnicodeSet XID = getSet(DERIVED, Mod_ID_Start).addAll(getSet(DERIVED, Mod_ID_Continue_NO_Cf));
                UnicodeSet alphanumSet = new UnicodeSet(alphaSet).addAll(digitSet).addAll(getSet(CATEGORY, Pc));
                Utility.showSetDifferences("ID", ID, "XID", XID, false, Default.ucd());
                Utility.showSetDifferences("ID", ID, "Alphabetic+Digit+Pc", alphanumSet, false, Default.ucd());
            }
            BufferedReader br = Utility.openReadFile("C:\\DATA\\ISO14652_CTYPE.txt", Utility.LATIN1);
            while (true) {
                String line = br.readLine();
                if (line == null) break;
                line = line.trim();
                if (line.length() == 0) continue;
                if (line.charAt(line.length() - 1) == '/') {
                    line = line.substring(0, line.length() - 1);
                }
                line = line.trim();
                if (line.length() == 0) continue;
                char ch = line.charAt(0);
                if (ch == '%') continue;
                if (ch == '(') continue;
                if (ch == '<') {
                    addItems(line, props[propCount-1].contents);
                } else {
                    // new property
                    System.out.println(line);
                    if (line.equals("width")) break;
                    props[propCount] = new Prop(line);
                    props[propCount].name = "ISO_14652_" + line;
                    props[propCount].contents = new UnicodeSet();
                    propCount++;
                }
            }
            for (int i = 0; i < propCount; ++i) props[i].show(log);
            log.println();
            log.println("**************************************************");
            log.println("Checking POSIX requirements for inclusion and disjointness.");
            log.println("**************************************************");
            log.println();
 /*
 alpha, digit, punct, cntrl are all disjoint
 space, cntrl, blank are pairwise disjoint with any of alpha, digit, xdigit
 alpha includes upper, lower
 graph includes alpha, digit, punct
 print includes graph
 xdigit includes digit
 */
            Prop
                alpha = getProp("ISO_14652_alpha"),
                upper = getProp("ISO_14652_upper"),
                lower = getProp("ISO_14652_lower"),
                graph = getProp("ISO_14652_graph"),
                //print = getProp("ISO_14652_print"),
                punct = getProp("ISO_14652_punct"),
                digit = getProp("ISO_14652_digit"),
                xdigit = getProp("ISO_14652_xdigit"),
                space = getProp("ISO_14652_space"),
                blank = getProp("ISO_14652_blank"),
                cntrl = getProp("ISO_14652_cntrl");
            checkDisjoint(log, new Prop[] {alpha, digit, punct, cntrl});
            Prop [] l1 = new Prop[] {space, cntrl, blank};
            Prop [] l2 = new Prop[] {alpha, digit, xdigit};
            for (int i = 0; i < l1.length; ++i) {
                for (int j = i + 1; j < l2.length; ++j) {
                    checkDisjoint(log, l1[i], l2[j]);
                }
            }
            checkIncludes(log, alpha, upper);
            checkIncludes(log, alpha, lower);
            checkIncludes(log, graph, alpha);
            checkIncludes(log, graph, digit);
            checkIncludes(log, graph, punct);
            //checkIncludes(log, print, graph);
            checkIncludes(log, xdigit, digit);
            // possibly alpha, digit, punct, cntrl, space cover the !(Cn,Cs)
            UnicodeSet trRemainder = new UnicodeSet(cnSet)
                .complement()
                .removeAll(csSet)
                .removeAll(digit.contents)
                .removeAll(punct.contents)
                .removeAll(alpha.contents)
                .removeAll(cntrl.contents)
                .removeAll(space.contents);
            Utility.showSetNames(log, "TR Remainder: ", trRemainder, false, false, Default.ucd());
            UnicodeSet propRemainder = new UnicodeSet(cnSet)
                .complement()
                .removeAll(csSet)
                //.removeAll(noSet)
                //.removeAll(cfSet)
                .removeAll(digit.guessContents)
                .removeAll(punct.guessContents)
                .removeAll(alpha.guessContents)
                .removeAll(cntrl.guessContents)
                .removeAll(space.guessContents);
            Utility.showSetNames(log, "Prop Remainder: ", propRemainder, false, false, Default.ucd());
            /*
            checkDisjoint(new Prop[] {alpha, digit, punct, cntrl});
            UnicodeSet remainder = cnSet.complement();
            UnicodeSet guessRemainder = new UnicodeSet(remainder);
            for (int i = 0; i < list.length; ++i) {
                for (int j = i + 1; j < list.length; ++j) {
                    compare(log, list[i].name, list[i].contents, list[j].name, list[j].contents);
                    compare(log, list[i].guess, list[i].guessContents, list[j].guess, list[j].guessContents);
                }
                remainder.removeAll(list[i].contents);
                guessRemainder.removeAll(list[i].guessContents);
            }
            if (remainder.size() != 0) {
                log.println();
                log.println("Incomplete (TR): " + remainder);
            }
            if (guessRemainder.size() != 0) {
                log.println();
                log.println("Incomplete (Prop): " + guessRemainder);
            }
            */
        } finally {
            log.close();
        }
    }
    static void checkDisjoint(PrintWriter log, Prop[] list) {
        for (int i = 0; i < list.length; ++i) {
            for (int j = i + 1; j < list.length; ++j) {
                checkDisjoint(log, list[i], list[j]);
            }
        }
    }
    static void checkDisjoint(PrintWriter log, Prop prop1, Prop prop2) {
        checkDisjoint(log, prop1.name, prop1.contents, prop2.name, prop2.contents);
        checkDisjoint(log, prop1.guess, prop1.guessContents, prop2.guess, prop2.guessContents);
    }
    static void checkDisjoint(PrintWriter log, String name, UnicodeSet set, String name2, UnicodeSet set2) {
        if (set.containsSome(set2)) {
            log.println();
            log.println("Fails test: " + name + " disjoint-with " + name2);
            UnicodeSet diff = new UnicodeSet(set).retainAll(set2);
            Utility.showSetNames(log, "", diff, false, false, Default.ucd());
        }
    }
    static void checkIncludes(PrintWriter log, Prop prop1, Prop prop2) {
        checkIncludes(log, prop1.name, prop1.contents, prop2.name, prop2.contents);
        checkIncludes(log, prop1.guess, prop1.guessContents, prop2.guess, prop2.guessContents);
    }
    static void checkIncludes(PrintWriter log, String name, UnicodeSet set, String name2, UnicodeSet set2) {
        if (!set.containsAll(set2)) {
            log.println();
            log.println("Fails test:" + name + " includes " + name2);
            UnicodeSet diff = new UnicodeSet(set2).removeAll(set);
            Utility.showSetNames(log, "", diff, false, false, Default.ucd());
        }
    }
    static String[] pieces = new String[100];
    // example: <U1F48>..<U1F4D>;<U1F59>;<U1F5B>;<U1F5D>;<U1F5F>;<U1F68>..<U1F6F>;/
    static void addItems(String line, UnicodeSet contents) {
        int len = Utility.split(line, ';', pieces);
        for (int i = 0; i < len; ++i) {
            String piece = pieces[i].trim();
            if (piece.length() == 0) continue;
            if (piece.equals("<0>")) continue;
            int start, end;
            int rangePoint = piece.indexOf("..");
            if (rangePoint >= 0) {
                start = parse(piece.substring(0,rangePoint));
                end = parse(piece.substring(rangePoint+2));
            } else {
                start = end = parse(piece);
            }
            contents.add(start, end);
        }
    }
    static int parse(String piece) {
        if (!piece.startsWith("<U") || !piece.endsWith(">")) {
            throw new IllegalArgumentException("Bogus code point: " + piece);
        }
        return Integer.parseInt(piece.substring(2,piece.length()-1), 16);
    }
    static Prop getProp(String name) {
        //System.out.println("Searching for: " + name);
        for (int i = 0; i < propCount; ++i) {
            //System.out.println("Checking: " + props[i].name);
            if (props[i].name.equals(name)) {
                return props[i];
            }
        }
        //System.out.println("Missed");
        return null;
    }
    // oddities: 
        // extra space after ';' <U0300>..<U036F>; <U20D0>..<U20FF>; <UFE20>..<UFE2F>;/
        // <0>?? <0>;<U0BE7>..<U0BEF>;/
        // <U202C>; <U202D>;<U202E>; <UFEFF> : 0;/
       // % "print" is by default "graph", and the <space> character
       // print is odd, since it includes space but not other spaces.
       // alnum not defined.
 }
--- a/tools/unicodetools/com/ibm/text/UCD/CompareProperties.java
+++ b/tools/unicodetools/com/ibm/text/UCD/CompareProperties.java
@ -1,473 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/CompareProperties.java,v $
 * $Date: 2004/02/12 08:23:15 $
 * $Revision: 1.5 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCD;
 import java.util.*;
 import java.io.*;
 import java.text.NumberFormat;
 import com.ibm.text.utility.*;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.text.UnicodeSetIterator;
 public class CompareProperties implements UCD_Types {
 	static final boolean DO_DISJOINT = false;
 	static CompareProperties me = null;
 	static void partition() throws IOException {
 		if (me == null) me = new CompareProperties();
 		me.printPartition();
 	}
 	static void statistics() throws IOException {
 		UnicodeSet a = new UnicodeSet("[abc]");
 		UnicodeSet empty = new UnicodeSet();
 		System.out.println(a.containsAll(empty));
 		System.out.println(empty.containsAll(a));
 		System.out.println(empty.containsAll(new UnicodeSet()));
 		if (me == null) me = new CompareProperties();
 		me.printStatistics();
 	}
 	public final class BitSetComparator implements Comparator {
 		public int compare(Object o1, Object o2) {
 			BitSet bs1 = (BitSet) o1;
 			BitSet bs2 = (BitSet) o2;
 			int count2 = bs1.size() > bs2.size() ? bs1.size() : bs2.size();
 			for (int i = 0; i < count2; ++i) {
 				if (bs1.get(i)) {
 					if (!bs2.get(i)) {
 						return 1;
 					}
 				} else if (bs2.get(i)) {
 					return -1;
 				}
 			}
 			return 0;
 		 }
 	}
 	/*
 	 * 
 	 * @author Davis
 	 *
 	 * Reverses the order of a comparison, for getting a list in reverse order
 	 */
 	public static class InverseComparator implements Comparator {
 		private Comparator other;
 		public InverseComparator(Comparator other) {
 			this.other = other;
 		}
 		public int compare(Object a, Object b) {
 			return other.compare(b, a);
 		}
 	}
 	/*
 	 * 
 	 * @author Davis
 	 *
 	 * Reverses the order of a comparison, for getting a list in reverse order
 	 */
 	public static class MethodComparator implements Comparator {
 		public int compare(Object a, Object b) {
 			return ((Comparable)a).compareTo(b);
 		}
 	}
 	public final static class UnicodeSetComparator implements Comparator {
 		/**
 		 * Compares two UnicodeSets, producing a transitive ordering.
         * The ordering is based on the first codepoint that differs between them.
 		 * @return -1 if first set contains the first different code point 
 		 * 1 if the second set does.
 		 * 0 if there is no difference.
 		 * If compareTo were added to UnicodeSet, this can be optimized to use list[i].
 		 * @author Davis
 		 *
 		 */
 		public int compare(Object o1, Object o2) {
 			UnicodeSetIterator it1 = new UnicodeSetIterator((UnicodeSet) o1);
 			UnicodeSetIterator it2 = new UnicodeSetIterator((UnicodeSet) o2);
 			while (it1.nextRange()) {
                if (!it2.nextRange()) return -1; // first has range while second exhausted
 				if (it1.codepoint < it2.codepoint) return -1; // first has code point not in second
 				if (it1.codepoint > it2.codepoint) return 1;
 				if (it1.codepointEnd < it2.codepointEnd) return 1; // second has codepoint not in first
 				if (it1.codepointEnd > it2.codepointEnd) return -1;
 			}
            if (it2.nextRange()) return 1; // second has range while first is exhausted
 			return 0; // otherwise we ran out in both of them, so equal
 		 }
 	}
    boolean isPartitioned = false;
 	UCDProperty[] props = new UCDProperty[500];
 	UnicodeSet[] sets = new UnicodeSet[500];
 	int count = 0;
 	BitSet[] disjoints = new BitSet[500];
 	BitSet[] contains = new BitSet[500];
 	BitSet[] isin = new BitSet[500];
 	BitSet[] equals = new BitSet[500];
 	Map map = new TreeMap(new BitSetComparator());
    {
        getProperties();   
        fillPropertyValues();
 		Utility.fixDot();
 	}
 	private void fillPropertyValues() {
 	    BitSet probe = new BitSet();
 	    int total = 0;
 	    for (int cp = 0; cp <= 0x10FFFF; ++cp) {
 	        Utility.dot(cp);
 	        int cat = Default.ucd().getCategory(cp);
 	        // if (cat == UNASSIGNED || cat == PRIVATE_USE || cat == SURROGATE) continue;
 	        if (!Default.ucd().isAllocated(cp)) continue;
 	        for (int i = 0; i < count; ++i) {
 	            UCDProperty up = props[i];
 	            boolean iProp = up.hasValue(cp);
 	            if (iProp) {
 	            	probe.set(i);
 	            	sets[i].add(cp);
 	            } else {
 	            	probe.clear(i);
 	          	} 
 	        }
 	        ++total;
 	        UnicodeSet value = (UnicodeSet) map.get(probe);
 	        if (value == null) {
 	        	value = new UnicodeSet();
 	            map.put(probe.clone(), value);
 	            // Utility.fixDot();
 	            // System.out.println("Set Size: " + map.size() + ", total: " + total + ", " + Default.ucd.getCodeAndName(cp));
 	        }
 	        value.add(cp);
 	    }
 	}
 	private void getProperties() {
 	    for (int i = 0; i < LIMIT_ENUM; ++i) { //   || iType == SCRIPT
 	        int iType = i & 0xFF00;
 	        if (iType == AGE || iType == JOINING_GROUP || iType == COMBINING_CLASS) continue;
 	        if (i == 0x0900) {
 	        	System.out.println("debug");
 	        }
 	        UCDProperty up = UnifiedBinaryProperty.make(i, Default.ucd());
 	        if (up == null) continue;
 			if (up.getValueType() < BINARY_PROP) {
 				System.out.println("\tSkipping " + up.getName() + "; value varies");
 				continue;
 			}
 	        if (!up.isStandard()) {
 	            System.out.println("\tSkipping " + getPropName(up) + "; not standard");
 	            continue;
 	        }
 	        if (up.getName(LONG).startsWith("Other_")) {
 				System.out.println("\tSkipping " + getPropName(up) + "; contributory");
 				continue;	        	
 	        }
 	        if (up.isDefaultValue() || up.skipInDerivedListing()) {
 				System.out.println("\tSkipping " + getPropName(up) + "; default value");
 				continue;	        	
 	        }
 	        // System.out.println(Utility.hex(i) + " " + up.getName(LONG) + "(" + up.getName(SHORT) + ")");
 	        // System.out.println("\t" + up.getValue(LONG) + "(" + up.getValue(SHORT) + ")");
 			sets[count] = new UnicodeSet();
 			disjoints[count] = new BitSet();
 			equals[count] = new BitSet();
 			contains[count] = new BitSet();
 			isin[count] = new BitSet();
 	        props[count++] = up;
 	        System.out.println(Utility.hex(i) + " " + (count - 1) + " " + getPropName(count - 1));	        
 	    }
 	    System.out.println("props: " + count);
 	}
 	public void printPartition() throws IOException {
 		System.out.println("Set Size: " + map.size());
 		PrintWriter output = Utility.openPrintWriter("Partition"
 			 + UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_WINDOWS);
 		Iterator it = map.keySet().iterator();
 		while (it.hasNext()) {
 			BitSet probe2 = (BitSet) it.next();
 			UnicodeSet value = (UnicodeSet) map.get(probe2);
 			output.println();
 			output.println(value);
 			output.println("Size: " + value.size());
 			for (int i = 0; i < count; ++i) {
 				if (!probe2.get(i)) continue;
 				output.print(" " + getPropName(i));
 			}
 			output.println();
 		}
 		output.println("Count: " + map.keySet().size());
 		output.close();
 	}
 	static final NumberFormat percent = NumberFormat.getPercentInstance(Locale.ENGLISH);
 	public void printStatistics() throws IOException {
 		System.out.println("Set Size: " + map.size());
 		PrintWriter output = Utility.openPrintWriter("Statistics"
 			 + UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_WINDOWS);
        System.out.println("Finding disjoints/contains");
        for (int i = 0; i < count; ++i) {
 			System.out.println(getPropName(i));
        	for (int j = 0; j < count; ++j) {
        		if (j == i) continue;
        		if (i == 1 && j == 2) {
        			System.out.println("debug");
        		}
        		if (sets[i].containsNone(sets[j])) {
        			disjoints[i].set(j);
        		} else if (sets[i].equals(sets[j])) {
 					equals[i].set(j);
 				} else if (sets[i].containsAll(sets[j])) {
 					contains[i].set(j);
 				} else if (sets[j].containsAll(sets[i])) {
 					isin[i].set(j);
        		}
         	}
        }
 		System.out.println("Removing non-maximal sets");
 		// a set is non-maximal if it is contained in one of the other sets
 		// so remove anything that is contained in one of the items
 		if (false) {
 			BitSet[] tempContains = new BitSet[count];
 			for (int i = 0; i < count; ++i) {
 				System.out.println(getPropName(i));
 				tempContains[i] = (BitSet) contains[i]; // worry about collisions
 				BitSet b = contains[i];
 				for (int j = 0; j < b.size(); ++j) {
 					if (b.get(j)) tempContains[i].andNot(contains[j]);
 				}
 				b = disjoints[i];	// don't worry
 				for (int j = 0; j < b.size(); ++j) {
 					if (b.get(j)) b.andNot(contains[j]);
 				}
 			}
 			for (int i = 0; i < count; ++i) {
 				contains[i] = tempContains[i];
 			}
 		}
 		System.out.println("Printing disjoints & contains");
 		// a set is non-maximal if it is contained in one of the other sets
 		// so remove anything that is contained in one of the items
 		List remainder = new ArrayList();
 		Map m = new TreeMap(); // new UnicodeSetComparator()
 		for (int i = 0; i < count; ++i) {
 			m.put(getPropName(i), new Integer(i)); // sets[i]
 		}
 		Iterator it = m.keySet().iterator();
 		while (it.hasNext()) {
 			Object key = it.next();
 			int index = ((Integer)m.get(key)).intValue();
 			boolean haveName = printBitSet(output, index, "EQUALS: ", equals[index], false);
 			haveName = printBitSet(output, index, "CONTAINS: ", contains[index], haveName);
 			haveName = printBitSet(output, index, "IS CONTAINED IN: ", isin[index], haveName);
 			if (DO_DISJOINT) {
 				printBitSet(output, index, "IS DISJOINT WITH: ", disjoints[index], haveName);
 			}
 			if (!haveName) remainder.add(getPropName(index));
 		}
 		it = remainder.iterator();
 		output.println();
 		output.print("NONE OF THE ABOVE: ");
 		boolean first = true;
 		while (it.hasNext()) {
 			Object key = it.next();
 			if (!first) output.print(", ");
 			first = false;
 			output.print(key);
 		}
 		output.println();
 		output.close();
 	}
    private boolean printBitSet(PrintWriter output, int index, String title, BitSet b, boolean haveName) {
        if (!b.isEmpty()) {
        	if (!haveName) {
 				output.println();
 				output.println(getPropName(index));
 		       	haveName = true;
 			}
 			output.print(title);
 			Set ss = new TreeSet();
 			for (int j = 0; j < b.size(); ++j) {      		
 				if (b.get(j)) {
 					ss.add(getPropName(j));
 				}
 			}
 			Iterator it = ss.iterator();
 	       	boolean first = true;
 	       	while (it.hasNext()) {
    			if (!first) output.print(", ");
    			first = false;
    			output.print(it.next());
        	}
 			output.println();
 			output.flush();
       }
       return haveName;
   }
 	/* 
 			UnicodeSet a_b = new UnicodeSet();
 			UnicodeSet ab = new UnicodeSet();
 			UnicodeSet _ab = new UnicodeSet();
 	 */
 	/*
 	a_b.set(sets[i]).removeAll(sets[j]);
 	ab.set(sets[i]).retainAll(sets[j]);
 	_ab.set(sets[j]).removeAll(sets[i]);
 	// we are interested in cases where a contains b or is contained by b
 	// contain = _ab = 0
 	// is contained == a_b = 0
 	// is disjoint == ab == 0
 	// is equal == contains & iscontained
 	double total = a_b.size() + ab.size() + _ab.size();
 	double limit = total*0.03;
 	boolean gotName = showDiff(output, "C", j, a_b, total, limit, false);
 	gotName = showDiff(output, "D", j, ab, total, limit, gotName);
 	gotName = showDiff(output, "S", j, _ab, total, limit, gotName);
 	if (gotName) output.println();
 	*/
 	private boolean showDiff(PrintWriter output, String title, int propIndex, UnicodeSet a_b, 
    		double total, double limit, boolean gotName) {
        if (0 < a_b.size() && a_b.size() < limit) {
        	if (!gotName) {
        		gotName = true;
        		output.print("\t" + getPropName(propIndex));
        	}
        	output.print("\t" + title + percent.format(a_b.size()/total));
        }
        return gotName;
    }
 	private String getPropName(int propertyIndex) {
 		return getPropName(props[propertyIndex]);
 	}
 	private String getPropName(UCDProperty ubp) {
 		return Utility.getUnskeleton(ubp.getFullName(LONG), true);
 	}
    public static void listDifferences() throws IOException {
        PrintWriter output = Utility.openPrintWriter("PropertyDifferences" + UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_UNIX);
        output.println("# Listing of relationships among properties, suitable for analysis by spreadsheet");
        output.println("# Generated for " + Default.ucd().getVersion());
        output.println(UnicodeDataFile.generateDateLine());
        output.println("# P1	P2	R(P1,P2)	C(P1&P2)	C(P1-P2)	C(P2-P1)");
        for (int i = 1; i < UCD_Types.LIMIT_ENUM; ++i) {
            int iType = i & 0xFF00;
            if (iType == UCD_Types.JOINING_GROUP || iType == UCD_Types.AGE || iType == UCD_Types.COMBINING_CLASS || iType == UCD_Types.SCRIPT) continue;
            UCDProperty upi = UnifiedBinaryProperty.make(i, Default.ucd());
            if (upi == null) continue;
            if (!upi.isStandard()) {
                System.out.println("Skipping " + upi.getName() + "; not standard");
                continue;
            }
            if (upi.getValueType() < UCD_Types.BINARY_PROP) {
                System.out.println("Skipping " + upi.getName() + "; value varies");
                continue;
            }
            String iNameShort = upi.getFullName(UCD_Types.SHORT);
            String iNameLong = upi.getFullName(UCD_Types.LONG);
            System.out.println();
            System.out.println();
            System.out.println(iNameLong);
            output.println("#" + iNameLong);
            int last = -1;
            for (int j = i+1; j < UCD_Types.LIMIT_ENUM; ++j) {
                int jType = j & 0xFF00;
                if (jType == UCD_Types.JOINING_GROUP || jType == UCD_Types.AGE || jType == UCD_Types.COMBINING_CLASS || jType == UCD_Types.SCRIPT
                    || (jType == iType && jType != UCD_Types.BINARY_PROPERTIES)) continue;
                UCDProperty upj = UnifiedBinaryProperty.make(j, Default.ucd());
                if (upj == null) continue;
                if (!upj.isStandard()) continue;
                if (upj.getValueType() < UCD_Types.BINARY_PROP) continue;
                if ((j >> 8) != last) {
                    last = j >> 8;
                    System.out.println();
                    System.out.print("\t" + UCD_Names.SHORT_UNIFIED_PROPERTIES[last]);
                    output.flush();
                    output.println("#\t" + UCD_Names.SHORT_UNIFIED_PROPERTIES[last]);
                } else {
                    System.out.print('.');
                }
                System.out.flush();
                int bothCount = 0, i_jPropCount = 0, j_iPropCount = 0, iCount = 0, jCount = 0;
                for (int cp = 0; cp <= 0x10FFFF; ++cp) {
                    int cat = Default.ucd().getCategory(cp);
                    if (cat == UCD_Types.UNASSIGNED || cat == UCD_Types.PRIVATE_USE || cat == UCD_Types.SURROGATE) continue;
                    if (!Default.ucd().isAllocated(cp)) continue;
                    boolean iProp = upi.hasValue(cp);
                    boolean jProp = upj.hasValue(cp);
                    if (jProp) ++jCount;
                    if (iProp) {
                        ++iCount;
                        if (jProp) ++bothCount;
                        else ++i_jPropCount;
                    } else if (jProp) ++j_iPropCount;
                }
                if (iCount == 0 || jCount == 0) continue;
                String jNameShort = upj.getFullName(UCD_Types.SHORT);
                //String jNameLong = ubp.getFullID(j, LONG);
                String rel = bothCount == 0 ? "DISJOINT"
                    : i_jPropCount == 0 && j_iPropCount == 0 ? "EQUALS"
                    : i_jPropCount == 0 ? "CONTAINS" // depends on reverse output
                    : j_iPropCount == 0 ? "CONTAINS"
                    : "OVERLAPS";
                if (j_iPropCount > i_jPropCount) {
                    // reverse output
                    output.println(jNameShort + "\t" + iNameShort + "\t" + rel
                        + "\t" + bothCount + "\t" + j_iPropCount + "\t" + i_jPropCount);
                } else {
                    output.println(iNameShort + "\t" + jNameShort + "\t" + rel
                        + "\t" + bothCount + "\t" + i_jPropCount + "\t" + j_iPropCount);
                }
            }
        }
        output.close();
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/ConvertUCD.java
+++ b/tools/unicodetools/com/ibm/text/UCD/ConvertUCD.java
@ -1,908 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/ConvertUCD.java,v $
 * $Date: 2006/04/05 22:12:44 $
 * $Revision: 1.18 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCD;
 import com.ibm.text.utility.*;
 import java.util.*;
 import java.text.NumberFormat;
 import java.io.*;
 /** Simple program to merge UCD files into XML. Not yet documented!!
 * @author Mark Davis
 */
 public final class ConvertUCD implements UCD_Types {
    public static final boolean SHOW = false;
    public static final boolean DEBUG = false;
    static final boolean SHOW_SAMPLE = false;
    int major;
    int minor;
    int update;
    String version;
    // varies by version
    /*
    public static final String BASE_DIR11 = DATA_DIR + "\\Versions\\";
    public static final String BASE_DIR20 = DATA_DIR + "\\Versions\\";
    public static final String BASE_DIR21 = DATA_DIR + "\\Versions\\";
    public static final String BASE_DIR30 = DATA_DIR + "\\Update 3.0.1\\";
    public static final String BASE_DIR31 = DATA_DIR + "\\3.1-Update\\";
    */
    //public static final String blocksnamePlain = "Blocks.txt";
    //public static final String blocksname31 = "Blocks-4d2.beta";
    /** First item is file name, rest are field names (skipping character).
     *  "OMIT" is special -- means don't record
     */
    static String[][] labelList = {
        // Labels for the incoming files. Labels MUST match field order in file.
        // IMPORTANT - defaults of form y-=x must occur after x is encountered!
        // The one exception is "st", which is handled specially.
        // So file order is important.
        //*
        // 01CA;LATIN CAPITAL LETTER NJ;Lu;0; L; <compat> 004E 004A;  ;  ;  ;N ;LATIN CAPITAL LETTER N J;    ;  ;01CC;01CB
        //      n                       gc cc bc dm                 dd dv nv bm on                       cm,  uc lc   tc
        {"UnicodeData", "n", "gc", "cc", "bc", "dm", "dd", "dv", "nv", "bm", "on", "OMIT", "*uc", "*lc", "*tc"},
        //{"ExtraProperties", "xp"},
        {"PropList", "binary"},
        //{"ExtraProperties", "xp"},
        {"EastAsianWidth", "ea", "OMIT"},
        {"LineBreak", "lb", "OMIT"},
        {"SpecialCasing", "*sl", "*st", "*su", "sc"},
        {"CompositionExclusions", "ce"},
        {"CaseFolding", "OMIT", "*fc"},
        {"ArabicShaping", "OMIT", "jt", "jg"},
        {"BidiMirroring", "*bg"},
        {"Scripts", "sn"},
        //{"Jamo", "jn"},
        //{"Scripts-1d4", "RANGE", "sn"},
        //{"Age", "*sn"},
         //*/
         /*
        //*/
    };
    static HashMap isHex = new HashMap();
    static HashMap defaults = new HashMap();
    static {
        for (int j = 0; j < labelList.length; ++j) {
            String[] labels = labelList[j];
            for (int i = 1; i < labels.length; ++i) {
                boolean hex = false;
                String def = null;
                //char appendChar = '\u0000';
                // pull off "*": hex interpretation
                if (labels[i].charAt(0) == '*') { // HEX value
                    hex = true;
                    labels[i] = labels[i].substring(1);
                }
                /*
                // pull off "$": append duplicates
                if (labels[i].charAt(0) == '$') { // HEX value
                    appendChar = labels[i].charAt(1);
                    labels[i] = labels[i].substring(2);
                }
                // pull off default values
                int pos = labels[i].indexOf('-');
                if (pos >= 0) {
                    def = labels[i].substring(pos+1);
                    labels[i] = labels[i].substring(0,pos);
                }
                */
                // store results
                // we do this after all processing, so that the label is clean!!
                if (hex) isHex.put(labels[i], "");
                //if (appendChar != 0) appendDuplicates.put(labels[i], String.valueOf(appendChar));
                defaults.put(labels[i], def);
            }
        }
    }
    /*
    static String[][] labelList31 = {
        // Labels for the incoming files. Labels MUST match field order in file.
        // IMPORTANT - defaults of form y-=x must occur after x is encountered!
        // The one exception is "st", which is handled specially.
        // So file order is important.
        //*
        // 01CA;LATIN CAPITAL LETTER NJ;Lu;0; L; <compat> 004E 004A;  ;  ;  ;N ;LATIN CAPITAL LETTER N J;    ;  ;01CC;01CB
        //      n                       gc cc bc dm                 dd dv nv bm on                       cm,  uc lc   tc
        {"UnicodeData-3.1.0d8.beta", "n", "gc", "cc", "bc", "dm", "dd", "dv", "nv", "bm", "on", "OMIT", "*uc", "*lc", "*tc"},
        {"PropList-3.1.0d5.beta", "binary"},
        {"ExtraProperties", "xp"},
        {"EastAsianWidth-4d7.beta", "ea", "OMIT"},
        {"LineBreak-6d6.beta", "lb", "OMIT"},
        {"SpecialCasing-4d1.beta", "*sl", "*st", "*su", "sc"},
        {"CompositionExclusions-3d6.beta", "ce"},
        {"CaseFolding-3d4.beta", "OMIT", "*fc"},
        {"ArabicShaping", "OMIT", "jt", "jg"},
        {"BidiMirroring", "*bg"},
        {"Scripts-3.1.0d4.beta", "sn"},
        //{"Scripts-1d4", "RANGE", "sn"},
        //{"Age", "*sn"},
         //*/
         /*
        {"Jamo", "jn"},
        //
    };
    /*
        {"UnicodeData-3.1.0d8.beta", "n", "gc", "cc", "bc", "dm", "dd", "dv", "nv", "bm", "on", "OMIT", "*uc", "*lc", "*tc"},
        {"ExtraProperties", "xp"},
        {"EastAsianWidth-4d7.beta", "ea", "OMIT"},
        {"LineBreak-6d6.beta", "lb", "OMIT"},
        {"SpecialCasing-4d1.beta", "*sl", "*st", "*su", "sc"},
        {"CompositionExclusions-3d6.beta", "ce"},
        {"CaseFolding-3d4.beta", "OMIT", "*fc"},
        {"PropList-3.1.0d2.beta", "PROP", "OMIT"},
        {"ArabicShaping", "OMIT", "jt", "jg"},
        {"BidiMirroring", "*bg"},
        {"Scripts-1d4", "sn"},
        //{"Scripts-1d4", "RANGE", "sn"},
        //{"Age", "*sn"},
         //*/
         /*
        {"Jamo", "jn"},
        //
    //"NamesList-3.1.0d1.beta"
    static String[][] labelList30 = {
        // Labels for the incoming files. Labels MUST match field order in file.
        // IMPORTANT - defaults of form y-=x must occur after x is encountered!
        // The one exception is "st", which is handled specially.
        // So file order is important.
        //*
        {"UnicodeData", "n", "gc", "cc", "bc", "dm", "dd", "dv", "nv", "bm", "on", "OMIT", "*uc", "*lc", "*tc"},
        {"CompositionExclusions", "ce"},
        {"EastAsianWidth", "ea", "OMIT"},
        {"LineBreak", "lb", "OMIT"},
        {"SpecialCasing", "*sl", "*st", "*su", "sc"},
        {"CaseFolding", "OMIT", "*fc"},
        {"ArabicShaping", "OMIT", "jt", "jg"},
        {"BidiMirroring", "*bg"},
        /*
        {"Jamo", "jn"},
        {"PropList.alpha", "RANGE", "OMIT"},
        //
    };
    static String[][] labelList11 = {
        {"UnicodeData-1.1", "n", "gc", "cc", "bc", "dm", "dd", "dv", "nv", "bm", "on", "OMIT", "*uc", "*lc", "*tc"},
    };
    static String[][] labelList20 = {
        {"UnicodeData-2.0", "n", "gc", "cc", "bc", "dm", "dd", "dv", "nv", "bm", "on", "OMIT", "*uc", "*lc", "*tc"},
    };
    static String[][] labelList21 = {
        {"UnicodeData-2.1", "n", "gc", "cc", "bc", "dm", "dd", "dv", "nv", "bm", "on", "OMIT", "*uc", "*lc", "*tc"},
    };
    */
    // handles
    public static final String blocksname = "Blocks";
    //public static final String[][] labelList;
    public static final boolean NEWPROPS = true;
    /*
    static {
        switch (major*10 + minor) {
        case 31:
            blocksname = blocksname31;
            labelList = labelList31;
            break;
        case 30:
            blocksname = blocksnamePlain;
            labelList = labelList30;
            break;
        case 21:
            blocksname = blocksnamePlain;
            labelList = labelList21;
            break;
        case 20:
            blocksname = blocksnamePlain;
            labelList = labelList20;
            break;
        default:
            blocksname = blocksnamePlain;
            labelList = labelList11;
            break;
        }
    }
    */
    static final String dataFilePrefix = "UCD_Data";
    // MAIN!!
    public static void main (String[] args) throws Exception {
        System.out.println("Building binary version of UCD");
        log = new PrintWriter(new BufferedWriter(
            new OutputStreamWriter(
                new FileOutputStream(GEN_DIR + "UCD-log.txt"),
                "UTF8"),
            32*1024));
        log.write("\uFEFF"); // BOM
        try {
            for (int i = 0; i < args.length; ++i) {
                String version = args[i];
                if (version.length() == 0) version = UCD.latestVersion;
                new ConvertUCD().toJava(version);
            }
        } finally {
            log.close();
        }
    }
    /*
    static void toXML() throws Exception {
        // Blocks is special
        // Unihan is special
        // collect all the other .txt files in the directory
        if (false) readBlocks();
        if (true) for (int i = 0; i < labelList.length; ++i) {
            readSemi(labelList[i]);
        } else {
            readSemi(labelList[0]); // TESTING ONLY
        }
        writeXML();
    }
    */
    void toJava(String version) throws Exception {
        this.version = version;
        String[] parts = new String[3];
        Utility.split(version, '.', parts);
        major = Integer.parseInt(parts[0]);
        minor = Integer.parseInt(parts[1]);
        update = Integer.parseInt(parts[2]);
        System.out.println("Building " + version);
        // Blocks is special
        // Unihan is special
        // collect all the other .txt files in the directory
        if (false) readBlocks();
        if (true) for (int i = 0; i < labelList.length; ++i) {
            readSemi(labelList[i]);
        } else {
            readSemi(labelList[0]); // TESTING ONLY
        }
        Iterator it = charData.keySet().iterator();
        while (it.hasNext()) {
            Object key = it.next();
            UData value = (UData) charData.get(key);
            value.compact();
        }
        /*
        UData ud;
        ud = getEntry(0x5e);
        System.out.println("SPOT-CHECK: 5e: " + ud);
        ud = getEntry(0x130);
        System.out.println("SPOT-CHECK: 130: " + ud);
        ud = getEntry(0x1f6);
        System.out.println("SPOT-CHECK: 1f6: " + ud);
        ud = getEntry(0x2A6D6);
        System.out.println("SPOT-CHECK: 2A6D6: " + ud);
        ud = getEntry(0xFFFF);
        System.out.println("SPOT-CHECK: FFFF: " + ud);
        */
        writeJavaData();
    }
    static PrintWriter log;
    //static String directory = BASE_DIR;
    //static Map appendDuplicates = new HashMap();
    /** First item in labels is file name, rest are field names (skipping character).
     *  "OMIT" is special -- means don't record
     */
    List blockData = new LinkedList();
    void readBlocks() throws Exception {
        System.out.println("Reading 'Blocks'");
        BufferedReader input = Utility.openUnicodeFile(blocksname, version, true, Utility.LATIN1);
        String line = "";
        try {
    	    String[] parts = new String[20];
            for (int lineNumber = 1; ; ++lineNumber) {
                line = input.readLine();
 			    if (line == null) break;
 			    if (SHOW && (lineNumber % 500) == 0) System.out.println("//" + lineNumber + ": '" + line + "'");
                //String original = line;
 			    String comment = "";
 			    int commentPos = line.indexOf('#');
 			    if (commentPos >= 0) {
 			        comment = line.substring(commentPos+1).trim();
 			        line = line.substring(0, commentPos);
 			    }
 			    line = line.trim();
 			    if (line.length() == 0) continue;
                int count = Utility.split(line,';',parts);
                if (count != 3) throw new ChainException("Bad count in Blocks", null);
                blockData.add(new String[] {Utility.fromHex(parts[0]), Utility.fromHex(parts[1]), parts[2].trim()});
            }
        } catch (Exception e) {
            System.out.println("Exception at: " + line);
            throw e;
        } finally {
            input.close();
        }
    }
    Set properties = new TreeSet();
    void readSemi(String[] labels) throws Exception {
        System.out.println();
        System.out.println("Reading '" + labels[0] + "'");
        if (major < 3 || (major == 3 && minor < 1)) {
            if (labels[0] == "PropList") {
                System.out.println("SKIPPING old format of Proplist for " + version);
                return;
            }
        }
        String tempVersion = version;
        if (version.equals(UCD.latestVersion)) tempVersion = "";
        BufferedReader input = Utility.openUnicodeFile(labels[0], tempVersion, true, Utility.LATIN1);
        if (input == null) {
            System.out.println("COULDN'T OPEN: " + labels[0]);
            return;
        }
        boolean showedSemi = false;
        boolean showedShort = false;
        String line = "";
        try {
    	    String[] parts = new String[20];
            for (int lineNumber = 1; ; ++lineNumber) {
                try {
 					line = input.readLine();
 					if (line == null) break;
 					if (SHOW && (lineNumber % 500) == 0) System.out.println("//" + lineNumber + ": '" + line + "'");
 					String original = line;
 					String comment = "";
 					int commentPos = line.indexOf('#');
 					if (commentPos >= 0) {
 					    comment = line.substring(commentPos+1).trim();
 					    line = line.substring(0, commentPos);
 					}
 					line = line.trim();
 					if (line.length() == 0) continue;
 					int count = Utility.split(line,';',parts);
 					if (false && parts[0].equals("2801")) {
 					    System.out.println("debug?");
 					}
 					// fix malformed or simple lists.
 					if (count != labels.length) {
 					    if (count == labels.length + 1 && parts[count-1].equals("")) {
 					        if (!showedSemi) System.out.println("Extra semicolon in: " + original);
 					        showedSemi = true;
 					    } else if (count == 1) { // fix simple list
 					        ++count;
 					        parts[1] = "Y";
 					    } else if (count < labels.length) {
 					        if (!showedShort) System.out.println("Line shorter than labels: " + original);
 					        showedShort = true;
 					        for (int i = count; i < labels.length; ++i) {
 					            parts[i] = "";
 					        }
 					    } else {
 					        throw new ChainException("wrong count: {0}",
 					            new Object[] {new Integer(line), new Integer(count)});
 					    }
 					}
 					// store char
 					 // first field is always character OR range. May be UTF-32
 					int cpTop;
 					int cpStart;
 					int ddot = parts[0].indexOf(".");
 					if (ddot >= 0) {
 					    cpStart = UTF32.char32At(Utility.fromHex(parts[0].substring(0,ddot)),0);
 					    cpTop = UTF32.char32At(Utility.fromHex(parts[0].substring(ddot+2)),0);
 					    // System.out.println(Utility.hex(cpStart) + " ... " + Utility.hex(cpTop));
 					} else {
 					    cpStart = UTF32.char32At(Utility.fromHex(parts[0]),0);
 					    cpTop = cpStart;
 					    if (labels[1].equals("RANGE")) UTF32.char32At(Utility.fromHex(parts[1]),0);
 					}
 					// properties first
 					if (labels[1].equals("PROP")) {
 					    String prop = parts[2].trim();
 					    // FIX!!
 					    boolean skipLetters = false;
 					    if (prop.equals("Alphabetic")) {
 					        prop = "Other_Alphabetic";
 					        skipLetters = true;
 					    }
 					    // END FIX!!
 					    properties.add(prop);
 					    if (Utility.find(prop, UCD_Names.DeletedProperties, true) == -1) { // only undeleted
 					        int end = UTF32.char32At(Utility.fromHex(parts[1]),0);
 					        if (end == 0) end = cpStart;
 					        for (int j = cpStart; j <= end; ++j) {
 					            if (j != UCD.mapToRepresentative(j, Integer.MAX_VALUE)) continue;
 					            if (skipLetters && getEntry(cpStart).isLetter()) continue;
 					            appendCharProperties(j, prop);
 					        }
 					    }
 					} else { // not range!
 					    String val = "";
 					    String lastVal;
 					    for (int i = 1; i < labels.length; ++i) {
 					        String key = labels[i];
 					        lastVal = val;
 					        if (isHex.get(key) != null) {
 					            val = Utility.fromHex(parts[i]);
 					        } else {
 					            val = parts[i].trim();
 					        }
 					        if (key.equals("OMIT")) continue; // do after val, so lastVal is correct
 					        if (key.equals("RANGE")) continue; // do after val, so lastVal is correct
 					        if (val.equals("")) continue; // skip empty values, they mean default
 					        for (int cps = cpStart; cps <= cpTop; ++cps) {
 					            if (UCD.mapToRepresentative(cps, Integer.MAX_VALUE) != cps) continue;    // skip condensed ranges
 					            if (key.equals("binary")) {
 					                appendCharProperties(cps, val);
 					            } else if (key.equals("fc")) {
 					                UData data = getEntry(cps);
 					                String type = parts[i-1].trim();
 					                if (type.equals("F") || type.equals("C") || type.equals("E") || type.equals("L")) {
 					                    data.fullCaseFolding = val;
 					                    //System.out.println("*<" + parts[i-1] + "> Setting " + Utility.hex(cps) + ": " + Utility.hex(val));
 					                }
 					                if (type.equals("S") || type.equals("C") || type.equals("L")) {
 					                    data.simpleCaseFolding = val;
 					                    //System.out.println("<" + parts[i-1] + "> Setting " + Utility.hex(cps) + ": " + Utility.hex(val));
 					                }
 					                if (type.equals("I")) {
 					                    data.simpleCaseFolding = val;
 					                    setBinaryProperty(cps, CaseFoldTurkishI);
 					                    if (DEBUG) System.out.println("SPOT-CHECK: <" + parts[i-1] + "> Setting " 
 					                    	+ Utility.hex(cps) + ": " + Utility.hex(val));
 					                }
 					            } else if (labels[0].equals("SpecialCasing")   // special handling for special casing
 					            			&& labels[4].equals("sc")
 					                		&& parts[4].trim().length() > 0) {
 					                if (i < 4) {
 					                	if (DEBUG) System.out.println("Got special: " + Utility.hex(cps) + ", " 
 					                		+ Utility.hex(key) + ":" + Utility.hex(val));
 					                	addCharData(cps, "sc", parts[4].trim() + ":" + key + ":" + val);
 					                }
 					            } else {
 					                /*if (key.equals("sn")) { // SKIP UNDEFINED!!
 					                    UData data = getEntryIfExists(cps);
 					                    if (data == null || data.generalCategory == Cn) continue;
 					                }
 					                */
 					                addCharData(cps, key, val);
 					            }
 					        }
 					    }
 					}
 				} catch (Exception e) {
 		            System.err.println("*Exception at: " + line + ", " + e.getMessage());
 					//System.err.println(e.getMessage());
 				}
            }
        } catch (Exception e) {
            System.out.println("Exception at: " + line + ", " + e.getMessage());
            throw e;
        } finally {
            input.close();
        }
        //printValues("JOINING_TYPE", jtSet);
        //printValues("JOINING_GROUP", jgSet);
    }
    static void printValues(String title, Set s) {
            Iterator it = s.iterator();
            System.out.println("public static String[] " + title + " = {");
            while (it.hasNext()) {
                String value = (String) it.next();
                System.out.println("    \"" + value + "\",");
            }
            System.out.println("};");
            it = s.iterator();
            System.out.println("public static byte ");
            int count = 0;
            while (it.hasNext()) {
                String value = (String) it.next();
                System.out.println("    " + value.replace(' ', '-').toUpperCase() + " = " + (count++) + ",");
            }
            System.out.println("    LIMIT_" + title + " = " + count);
            System.out.println(";");
    }
    Map charData = new TreeMap();
    /*
    static void writeXML() throws IOException {
        System.out.println("Writing 'UCD-Main.xml'");
        BufferedWriter output = new BufferedWriter(
            new OutputStreamWriter(
                new FileOutputStream(UCD.BIN_DIR + "UCD_Data.xml"),
                "UTF8"),
            32*1024);
        try {
            // write header
            output.write("<?xml version='1.0' encoding='utf-8'?>\r\n");
            output.write("<UnicodeCharacterDatabase>\r\n");
            output.write(" <!-- IMPORTANT: see UCD-Notes.html for information on the format. This file CANNOT be read correctly without that information. -->\r\n");
            output.write(" <unicode version='" + major + "' minor='" + minor + "' update='" + update + "'/>\r\n");
            output.write(" <fileVersion status='DRAFT' date='" + new Date() + "'/>\r\n");
            // write blocks
            Iterator it = blockData.iterator();
            while (it.hasNext()) {
                String[] block = (String[]) it.next();
                output.write(" <block start='" + Utility.quoteXML(block[0])
                    + "' end='" + Utility.quoteXML(block[1])
                    + "' name='" + Utility.quoteXML(block[2])
                    + "'/>\r\n" );
            }
            // write char data
            it = charData.keySet().iterator();
            while (it.hasNext()) {
                Integer cc = (Integer) it.next();
                output.write(" <e c='" + Utility.quoteXML(cc.intValue()) + "'");
                /*
                UData data = (UData) charData.get(cc);
                Iterator dataIt = data.keySet().iterator();
                while (dataIt.hasNext()) {
                    String label = (String) dataIt.next();
                    if (label.equals("c")) continue; // already wrote it.
                    if (label.equals("fc")) {
                        String fc = getResolved(data, "fc");
                        String lc = getResolved(data, "lc");
                        if (!fc.equals(lc) && !lc.equals(cc)) log.println("FC " + fc.length() + ": " + toString(cc));
                    }
                    String value = Utility.quoteXML((String) data.get(label));
                    output.write(" " + label + "='" + value + "'");
                }
                *//*
                output.write("/>\r\n");
            }
            // write footer
            output.write("</UnicodeCharacterDatabase>\r\n");
        } finally {
            output.close();
        }
    }
    */
    void writeJavaData() throws IOException {
        Iterator it = charData.keySet().iterator();
        int codePoint = -1;
        System.out.println("Writing " + dataFilePrefix + version);
        DataOutputStream dataOut = new DataOutputStream(
            new BufferedOutputStream(
                new FileOutputStream(UCD.BIN_DIR +  dataFilePrefix + version + ".bin"),
                128*1024));
        // write header
        dataOut.writeByte(BINARY_FORMAT);
        dataOut.writeByte(major);
        dataOut.writeByte(minor);
        dataOut.writeByte(update);
        long millis = System.currentTimeMillis();
        dataOut.writeLong(millis);
        dataOut.writeInt(charData.size());
        System.out.println("Data Size: " + NumberFormat.getInstance().format(charData.size()));
        int count = 0;
        // write records
        try {
            // write char data
            while (it.hasNext()) {
                Object cc = (Object) it.next();
                //codePoint = UTF32.char32At(cc,0);
                if (DEBUG) System.out.println(Utility.hex(cc));
                UData uData = (UData) charData.get(cc);
                if (false && uData.name == null) {
                    System.out.println("Warning: NULL name\r\n" + uData);
                    System.out.println();
                }
                if (false && uData.codePoint == 0x2801) {
                    System.out.println("SPOT-CHECK: " + uData);
                }
                uData.writeBytes(dataOut);
                count++;
                if (DEBUG) System.out.println("Setting2");
            }
            System.out.println("Wrote Data " + count);
        } catch (Exception e) {
            throw new ChainException("Bad data write {0}", new Object [] {Utility.hex(codePoint)}, e);
        } finally {
            dataOut.close();
        }
    }
    //static String[] xsSplit = new String[40];
    // Cache a little bit for speed
    int getEntryCodePoint = -1;
    UData getEntryUData = null;
    UData getEntryIfExists(int cp) {
        if (cp == getEntryCodePoint) return getEntryUData;
        Integer cc = new Integer(cp);
        UData charEntry = (UData) charData.get(cc);
        if (charEntry == null) return null;
        getEntryCodePoint = cp;
        getEntryUData = charEntry;
        return charEntry;
    }
    /* Get entry in table for cc
     */
    UData getEntry(int cp) {
        if (cp == getEntryCodePoint) return getEntryUData;
        Integer cc = new Integer(cp);
        UData charEntry = (UData) charData.get(cc);
        if (charEntry == null) {
            charEntry = new UData(cp);
            charData.put(cc, charEntry);
            //charEntry.put("c", cc);
        }
        getEntryCodePoint = cp;
        getEntryUData = charEntry;
        return charEntry;
    }
    /** Adds the character data. Signals duplicates with an exception
     */
    void setBinaryProperty(int cp, int binProp) {
        UData charEntry = getEntry(cp);
        charEntry.binaryProperties |= (1L << binProp);
    }
    void appendCharProperties(int cp, String key) {
        int ind;
        //if (true || NEWPROPS) {
            ind = Utility.lookup(key, UCD_Names.BP, true);
        /*} else {
            ind = Utility.lookup(key, UCD_Names.BP_OLD);
        }
        */
        //charEntry.binaryProperties |= (1 << ind);
        setBinaryProperty(cp, ind);
    }
    Set jtSet = new TreeSet();
    Set jgSet = new TreeSet();
    /** Adds the character data. Signals duplicates with an exception
     */
    void addCharData(int cp, String key, String value) {
        //if (cp < 10) System.out.println("A: " + Utility.hex(cp) + ", " + key + ", " + Utility.quoteJavaString(value));
        UData charEntry = getEntry(cp);
        //if (cp < 10) System.out.println("   " + charEntry);
        if (SHOW_SAMPLE && cp == 0x221) {
            System.out.println("Sample: " + cp + ", " + key + ", " + value);
            System.out.println(charEntry);
        }
        if (key.equals("bm")) {
            if (value.equals("Y")) charEntry.binaryProperties |= 1;
        } else if (key.equals("ce")) {
            charEntry.binaryProperties |= 2;
        } else if (key.equals("on")) {
            if (charEntry.name.charAt(0) == '<') {
                charEntry.name = '<' + value + '>';
            }
        } else if (key.equals("dm")) {
            charEntry.decompositionType = CANONICAL;
            if (value.charAt(0) == '<') {
                int pos = value.indexOf('>');
                String dType = value.substring(1,pos);
                if (major < 2) if (dType.charAt(0) == '+') dType = dType.substring(1);
                value = value.substring(pos+1);
                setField(charEntry, "dt", dType);
            }
            // FIX OLD
            if (major < 2) {
                int oldStyle = value.indexOf('<');
                if (oldStyle > 0) {
                    value = value.substring(0,oldStyle);
                }
                oldStyle = value.indexOf('{');
                if (oldStyle > 0) {
                    value = value.substring(0,oldStyle);
                }
            }
            setField(charEntry, key, Utility.fromHex(value));
        // fix the numeric fields to be more sensible
        } else if (key.equals("dd")) {
            if (charEntry.numericType < UCD_Types.DECIMAL) {
                charEntry.numericType = UCD_Types.DECIMAL;
            }
            setField(charEntry, "nv", value);
        } else if (key.equals("dv")) {
            if (charEntry.numericType < UCD_Types.DIGIT) {
                charEntry.numericType = UCD_Types.DIGIT;
            }
            setField(charEntry, "nv", value);
        } else if (key.equals("nv")) {
            if (charEntry.numericType < UCD_Types.NUMERIC) {
                charEntry.numericType = UCD_Types.NUMERIC;
            }
            setField(charEntry, "nv", value);
        /*} else if (key.equals("jt")) {
            jtSet.add(value);
        } else if (key.equals("jg")) {
            jgSet.add(value);
            */
        } else {
            setField(charEntry, key, value);
        }
        if (SHOW_SAMPLE && cp == 0x221) {
            System.out.println("Sample Result:");
            System.out.println(charEntry);
        }
    }
    public void setField(UData uData, String fieldName, String fieldValue) {
        try {
            if (fieldName.equals("n")) {
                uData.name = fieldValue;
            } else if (fieldName.equals("dm")) {
                uData.decompositionMapping = fieldValue;
            } else if (fieldName.equals("bg")) {
                uData.bidiMirror = fieldValue;
            } else if (fieldName.equals("uc")) {
                uData.simpleUppercase = fieldValue;
            } else if (fieldName.equals("lc")) {
                uData.simpleLowercase = fieldValue;
            } else if (fieldName.equals("tc")) {
                uData.simpleTitlecase = fieldValue;
            } else if (fieldName.equals("su")) {
                uData.fullUppercase = fieldValue;
            } else if (fieldName.equals("sl")) {
            	if (DEBUG) System.out.println("Setting full lowercase to " + Utility.hex(fieldValue) + uData);
                uData.fullLowercase = fieldValue;
            } else if (fieldName.equals("st")) {
                uData.fullTitlecase = fieldValue;
            } else if (fieldName.equals("sc")) {
            	if (uData.specialCasing.length() > 0) {
            		uData.specialCasing += ";";
            	}
                uData.specialCasing += fieldValue;
            } else if (fieldName.equals("xp")) {
                uData.binaryProperties |= 1L << Utility.lookup(fieldValue, UCD_Names.BP, true);
                //UCD_Names.BP_OLD
            } else if (fieldName.equals("gc")) {
                uData.generalCategory = Utility.lookup(fieldValue, UCD_Names.GENERAL_CATEGORY, true);
 //                if (major >= 5 && uData.script == Unknown_Script
 //                		&& uData.generalCategory != Cn
 //                		&& uData.generalCategory != Cs
 //                		&& uData.generalCategory != Co) {
 //                	uData.script = COMMON_SCRIPT;
 //                	System.out.println("Resetting to Common Script: " + Utility.hex(uData.codePoint));
 //                }
            } else if (fieldName.equals("bc")) {
                uData.bidiClass = Utility.lookup(fieldValue, UCD_Names.BIDI_CLASS, true);
            } else if (fieldName.equals("dt")) {
                if (major < 2) {
                    if (fieldValue.equals("no-break")) fieldValue = "noBreak";
                    else if (fieldValue.equals("circled")) fieldValue = "circle";
                    else if (fieldValue.equals("sup")) fieldValue = "super";
                    else if (fieldValue.equals("break")) fieldValue = "compat";
                    else if (fieldValue.equals("font variant")) fieldValue = "font";
                    else if (fieldValue.equals("no-join")) fieldValue = "compat";
                    else if (fieldValue.equals("join")) fieldValue = "compat";
                }
                uData.decompositionType = Utility.lookup(fieldValue, UCD_Names.LONG_DECOMPOSITION_TYPE, true);
            } else if (fieldName.equals("nt")) {
                uData.numericType = Utility.lookup(fieldValue, UCD_Names.LONG_NUMERIC_TYPE, true);
            } else if (fieldName.equals("ea")) {
                uData.eastAsianWidth = Utility.lookup(fieldValue, UCD_Names.EAST_ASIAN_WIDTH, true);
            } else if (fieldName.equals("lb")) {
                uData.lineBreak = Utility.lookup(fieldValue, UCD_Names.LINE_BREAK, true);
            } else if (fieldName.equals("sn")) {
                uData.script = Utility.lookup(fieldValue, UCD_Names.LONG_SCRIPT, true);
            } else if (fieldName.equals("jt")) {
                uData.joiningType = Utility.lookup(fieldValue, UCD_Names.JOINING_TYPE, true);
            } else if (fieldName.equals("jg")) {
                byte temp = (byte)Utility.find(fieldValue, UCD_Names.OLD_JOINING_GROUP, true);
                if (temp != -1) uData.joiningGroup = temp;
                else uData.joiningGroup = Utility.lookup(fieldValue, UCD_Names.JOINING_GROUP, true);
            } else if (fieldName.equals("nv")) {
                if (major < 2) {
                    if (fieldValue.equals("-")) return;
                }
                uData.numericValue = Utility.doubleFrom(fieldValue);
            } else if (fieldName.equals("cc")) {
                uData.combiningClass = (byte)Utility.intFrom(fieldValue);
                if (uData.combiningClass == 9 && major >= 5) {
                	System.out.println("setting Grapheme_Link " + Utility.hex(uData.codePoint) + "\t" + uData.name);
                	uData.binaryProperties |= (1<<GraphemeLink);
                	System.out.println(uData);
            	}
            } else if (fieldName.equals("bp")) {
                uData.binaryProperties = (byte)Utility.longFrom(fieldValue);
 //                if (major >= 5 && (uData.binaryProperties & 1<<Noncharacter_Code_Point) != 0) {
 //                	uData.script = Unknown_Script;
 //                }
                System.out.println("Resetting: " + uData);
            } else {
                throw new IllegalArgumentException("Unknown fieldName");
            }
        } catch (Exception e) {
            throw new ChainException(
            "Bad field name= \"{0}\", value= \"{1}\"", new Object[] {fieldName, fieldValue}, e);
        }
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/Default.java
+++ b/tools/unicodetools/com/ibm/text/UCD/Default.java
@ -1,93 +0,0 @@
 package com.ibm.text.UCD;
 import com.ibm.text.utility.*;
 import java.util.Date;
 import java.text.DateFormat;
 import java.text.SimpleDateFormat;
 import java.util.TimeZone;
 public final class Default implements UCD_Types {
    private static String ucdVersion = UCD.latestVersion;
    private static UCD ucd;
    private static Normalizer nfc;
    private static Normalizer nfd;
    private static Normalizer nfkc;
    private static Normalizer nfkd;
    private static Normalizer[] nf = new Normalizer[4];
    private static String year;
    public static void setUCD(String version) {
        ucdVersion = version;
    	setUCD();
    }
    private static boolean inRecursiveCall = false;
    private static void setUCD() {
        if (inRecursiveCall) {
            throw new IllegalArgumentException("Recursive call to setUCD");
        }
        inRecursiveCall = true;
        ucd = UCD.make(ucdVersion);
        nfd = nf[NFD] = new Normalizer(Normalizer.NFD, ucdVersion());
        nfc = nf[NFC] = new Normalizer(Normalizer.NFC, ucdVersion());
        nfkd = nf[NFKD] = new Normalizer(Normalizer.NFKD, ucdVersion());
        nfkc = nf[NFKC] = new Normalizer(Normalizer.NFKC, ucdVersion());
        System.out.println("Loaded UCD" + ucd().getVersion() + " " + (new Date(ucd().getDate())));
        inRecursiveCall = false;
    }
    static DateFormat myDateFormat = new SimpleDateFormat("yyyy-MM-dd', 'HH:mm:ss' GMT'");
    static DateFormat yearFormat = new SimpleDateFormat("yyyy");
    static {
        myDateFormat.setTimeZone(TimeZone.getTimeZone("GMT"));
        year = yearFormat.format(new Date());
    }
    public static String getDate() {
        return myDateFormat.format(new Date());
    }
    public static String getYear() {
        return year;
    }
    public static String ucdVersion() {
        if (ucd == null) setUCD();
        return ucdVersion;
    }
    public static UCD ucd() {
        if (ucd == null) setUCD();
        return ucd;
    }
    public static Normalizer nfc() {
        if (ucd == null) setUCD();
        return nfc;
    }
    public static Normalizer nfd() {
        if (ucd == null) setUCD();
        return nfd;
    }
    public static Normalizer nfkc() {
        if (ucd == null) setUCD();
        return nfkc;
    }
    public static Normalizer nfkd() {
        if (ucd == null) setUCD();
        return nfkd;
    }
    public static Normalizer nf(int index) {
        if (ucd == null) setUCD();
        return nf[index];
    }
 	/**
 	 * @param lineValue
 	 */
 	public static void setYear(String lineValue) {
 		year = lineValue;
 	}
 }
--- a/tools/unicodetools/com/ibm/text/UCD/DerivedAgeHeader.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/DerivedAgeHeader.txt
@ -1,29 +0,0 @@
 #
 # Unicode Character Database: Derived Property Data
 # This file shows when various code points were first assigned in Unicode.
 #
 # Caution: When using the Age *property*, all assigned code points
 # in each version are included, not just the newly assigned code points.
 # For more information, see http://www.unicode.org/reports/tr18/
 #
 # Notes:
 #
 # - The term 'assigned' means that a previously reserved code point was assigned
 #   to be a character (graphic, format, control, or private-use); 
 #   a noncharacter code point; or a surrogate code point.
 #   For more information, see The Unicode Standard Section 2.4
 #
 # - Versions are only tracked from 1.1 onwards, since version 1.0
 #   predated changes required by the ISO 10646 merger.
 #
 # - The Hangul Syllables that were removed from 2.0 are not included in the 1.1 listing.
 #
 # - The supplementary private use code points and the non-character code points
 #   were assigned in version 2.0, but not specifically listed in the UCD
 #   until versions 3.0 and 3.1 respectively.
 #
 # - Contiguous ranges are broken into separate lines where they would cross code point
 #   types: graphic, format, control, private-use, surrogate, noncharacter
 #
 # For details on the contents of each version, see
 #   http://www.unicode.org/versions/enumeratedversions.html.
--- a/tools/unicodetools/com/ibm/text/UCD/DerivedProperty.java
+++ b/tools/unicodetools/com/ibm/text/UCD/DerivedProperty.java
@ -1,982 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedProperty.java,v $
 * $Date: 2004/03/11 19:03:17 $
 * $Revision: 1.26 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCD;
 import com.ibm.text.utility.*;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;
 import java.util.*;
 import java.io.PrintWriter;
 public final class DerivedProperty implements UCD_Types {
    UCD ucdData;
    Normalizer nfc;
    Normalizer nfd;
    Normalizer nfkc;
    Normalizer nfkd;
    Normalizer[] nf = new Normalizer[4];
    UnicodeSet XID_Start_Set = new UnicodeSet();
    UnicodeSet XID_Continue_Set = new UnicodeSet();
    // ADD CONSTANT to UCD_TYPES
    static public UCDProperty make(int derivedPropertyID) {
        return make(derivedPropertyID, Default.ucd());
    }
    static public UCDProperty make(int derivedPropertyID, UCD ucd) {
        if (derivedPropertyID < 0 || derivedPropertyID >= DERIVED_PROPERTY_LIMIT) return null;
        DerivedProperty dp = getCached(ucd);
        return dp.dprops[derivedPropertyID];
    }
    ///////////////////////////////////////////////////////////
    static Map cache = new HashMap();
    static UCD lastUCD = null;
    static DerivedProperty lastValue = null;
    private static DerivedProperty getCached(UCD ucd) {
        if (ucd.equals(lastUCD)) return lastValue;
        DerivedProperty dp = (DerivedProperty) cache.get(ucd);
        if (dp == null) {
            dp = new DerivedProperty(ucd);
            cache.put(ucd, dp);
        }
        lastUCD = ucd;
        lastValue = dp;
        return dp;
    }
    /*
    public String getHeader(int propNumber) {
        UnicodeProperty dp = dprops[propNumber];
        if (dp != null) return dp.getHeader();
        else return "Unimplemented!!";
    }
    public String getName(int propNumber, byte style) {
        UnicodeProperty dp = dprops[propNumber];
        if (dp != null) return dp.getName(style);
        else return "Unimplemented!!";
    }
    public String getValue(int cp, int propNumber) {
        UnicodeProperty dp = dprops[propNumber];
        if (dp != null) return dp.getValue(cp);
        else return "Unimplemented!!";
    }
    public boolean isTest(int propNumber) {
        if (!isDefined(propNumber)) return false;
        return dprops[propNumber].isTest();
    }
    public boolean hasProperty(int cp, int propNumber) {
        if (!isDefined(propNumber)) return false;
        return dprops[propNumber].hasProperty(cp);
    }
    public boolean valueVaries(int propNumber) {
        return dprops[propNumber].valueVaries();
    }
    /*
    public String getValue(int cp, int propNumber) {
        return dprops[propNumber].getValue(int cp);
    }
    */
    private UCDProperty[] dprops = new UCDProperty[50];
    static final String[] CaseNames = {
                "Uppercase", 
                "Lowercase", 
                "Mixedcase"};
    class ExDProp extends UCDProperty {
        Normalizer nfx;
        ExDProp(int i) {
            type = DERIVED_NORMALIZATION;
            nfx = nf[i];
            name = "Expands_On_" + nfx.getName();
            shortName = "XO_" + nfx.getName();
            header = "# Derived Property: " + name
                + "\r\n#   Generated according to UAX #15."
                + "\r\n#   Characters whose normalized length is not one."
                + "\r\n#   WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact."
                + "\r\n#            The length of a normalized string is not necessarily the sum of the lengths of the normalized characters!";
        }
        public boolean hasValue(int cp) {
            if (ucdData.getDecompositionType(cp) == NONE) return false;
            String norm = nfx.normalize(cp);
            if (UTF16.countCodePoint(norm) != 1) return true;
            return false;
        }
    };
    class NF_UnsafeStartProp extends UCDProperty {
        Normalizer nfx;
        //int prop;
        NF_UnsafeStartProp(int i) {
            isStandard = false;
            type = DERIVED_NORMALIZATION;
            nfx = nf[i];
            name = nfx.getName() + "_UnsafeStart";
            shortName = nfx.getName() + "_SS";
            header = "# Derived Property: " + name
                + "\r\n#   Generated according to UAX #15."
                + "\r\n#   Characters that are cc==0, BUT which may interact with previous characters."
                ;
        }
        public boolean hasValue(int cp) {
            if (ucdData.getCombiningClass(cp) != 0) return false;
            String norm = nfx.normalize(cp);
            int first = UTF16.charAt(norm, 0);
            if (ucdData.getCombiningClass(first) != 0) return true;
            if (nfx.isComposition()
                && dprops[NFC_TrailingZero].hasValue(first)) return true; // 1,3 == composing
            return false;
        }
    };
    /*
    class HangulSyllableType extends UnicodeProperty {
        Normalizer nfx;
        //int prop;
        HangulSyllableType(int i) {
            isStandard = false;
            type = DERIVED_NORMALIZATION;
            nfx = nf[i];
            name = nfx.getName() + "_UnsafeStart";
            shortName = nfx.getName() + "_SS";
            header = "# Derived Property: " + name
                + "\r\n#   Generated according to UAX #15."
                + "\r\n#   Characters that are cc==0, BUT which may interact with previous characters."
                ;
        }
        public boolean hasValue(int cp) {
            if (ucdData.getCombiningClass(cp) != 0) return false;
            String norm = nfx.normalize(cp);
            int first = UTF16.charAt(norm, 0);
            if (ucdData.getCombiningClass(first) != 0) return true;
            if (nfx.isComposition()
                && dprops[NFC_TrailingZero].hasValue(first)) return true; // 1,3 == composing
            return false;
        }
    };
    */
    class NFC_Prop extends UCDProperty {
        BitSet bitset;
        boolean filter = false;
        boolean keepNonZero = true;
        NFC_Prop(int i) {
            isStandard = false;
            type = DERIVED_NORMALIZATION;
            BitSet[] bitsets = new BitSet[3];
            switch(i) {
                case NFC_Leading: bitsets[0] = bitset = new BitSet(); break;
                case NFC_Resulting: bitsets[2] = bitset = new BitSet(); break;
                case NFC_TrailingZero: keepNonZero = false; // FALL THRU
                case NFC_TrailingNonZero: bitsets[1] = bitset = new BitSet(); break;
            }
            filter = bitsets[1] != null;
            nfc.getCompositionStatus(bitsets[0], bitsets[1], bitsets[2]);
            name = Names[i-NFC_Leading];
            shortName = SNames[i-NFC_Leading];
            header = "# Derived Property: " + name
                + "\r\n#   " + Description[i-NFC_Leading]
                + "\r\n#   NFKC characters are the same, after subtracting the NFKD = NO values."
                + "\r\n#   Generated according to UAX #15."
                + "\r\n#   WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact."
                + "\r\n#            The length of a normalized string is not necessarily the sum of the lengths of the normalized characters!";
        }
        public boolean hasValue(int cp) {
            boolean result = bitset.get(cp);
            if (result && filter) {
                result = (ucdData.getCombiningClass(cp) != 0) == keepNonZero;
            }
            return result;
        }
        final String[] Names = {"NFC_Leading", "NFC_TrailingNonZero", "NFC_TrailingZero", "NFC_Resulting"};
        final String[] SNames = {"NFC_L", "NFC_TNZ", "NFC_TZ", "NFC_R"};
        final String[] Description = {
            "Characters that can combine with following characters in NFC",
            "Characters that can combine with previous characters in NFC, and have non-zero combining class",
            "Characters that can combine with previous characters in NFC, and have zero combining class",
            "Characters that can result from a combination of other characters in NFC",
        };
    };
    class GenDProp extends UCDProperty {
        Normalizer nfx;
        Normalizer nfComp = null;
        GenDProp (int i) {
            isStandard = false;
            setValueType(STRING_PROP);
            type = DERIVED_NORMALIZATION;
            nfx = nf[i];
            name = nfx.getName();
            String compName = "the character itself";
            if (i == NFKC || i == NFD) {
                name += "-NFC";
                nfComp = nfc;
                compName = "NFC for the character";
            } else if (i == NFKD) {
                name += "-NFD";
                nfComp = nfd;
                compName = "NFD for the character";
            }
            header = "# Derived Property: " + name              
                + "\r\n#   Lists characters in normalized form " + nfx.getName() + "."
                + "\r\n#   Only those characters whith normalized forms are DIFFERENT from " + compName + " are listed!"
                + "\r\n#   WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact."
                + "\r\n#            It is NOT sufficient to replace characters one-by-one with these results!";
        }
        int cacheCp = 0;
        String cacheStr = "";
        public String getValue(int cp, byte style) {
            if (cacheCp == cp) return cacheStr;
            cacheCp = cp;
            cacheStr = "";
            if (ucdData.getDecompositionType(cp) != NONE) {
                String cps = UTF32.valueOf32(cp);
                String comp = cps;
                if (nfComp != null) {
                    comp = nfComp.normalize(comp);
                }
                String normal = nfx.normalize(cps);
                if (!comp.equals(normal)) {
                    String norm = Utility.hex(normal);
                    String pad = Utility.repeat(" ", 14-norm.length());
                    cacheStr = name + "; " + norm + pad;
                }
            }
            return cacheStr;
            //if (cp >= 0xAC00 && cp <= 0xD7A3) return true;
            //System.out.println(Utility.hex(cps) + " => " + Utility.hex(nf[i-4].normalize(cps)));
        } // default
        public boolean hasValue(int cp) { return getValue(cp).length() != 0; }
    };
    class CaseDProp extends UCDProperty {
        byte val;
        CaseDProp (int i) {
            type = DERIVED_CORE;
            isStandard = false;
            val = (i == Missing_Uppercase ? Lu : i == Missing_Lowercase ? Ll : Lt);
            name = "Possible_Missing_" + CaseNames[i-Missing_Uppercase];
            header = "# Derived Property: " + name
            + "\r\n#  Generated from: NFKD has >0 " + CaseNames[i-Missing_Uppercase] + ", no other cases";
        }
        public boolean hasValue(int cp) {
            byte cat = ucdData.getCategory(cp);
            if (cat == val
            || val != Lt && ucdData.getBinaryProperty(cp, Other_Uppercase)) return false;
            byte xCat = getDecompCat(cp);
            if (xCat == val) return true;
            return false;
        }
    };
    class QuickDProp extends UCDProperty {
        String NO;
        String MAYBE;
        Normalizer nfx;
        QuickDProp (int i) {
            //setValueType((i == NFC || i == NFKC) ? ENUMERATED_PROP : BINARY_PROP);
            setValueType(ENUMERATED_PROP);
            type = DERIVED_NORMALIZATION;
            nfx = nf[i];
            NO = nfx.getName() + "_NO";
            MAYBE = nfx.getName() + "_MAYBE";
            name = nfx.getName() + "_QuickCheck";
            shortName = nfx.getName() + "_QC";
            header = "# Derived Property: " + name
            + "\r\n#  Generated from computing decomposibles"
            + ((i == NFC || i == NFKC)
                ? " (and characters that may compose with previous ones)" : "");
        }
        public String getValue(int cp, byte style) { 
            if (!nfx.isNormalized(cp)) return NO;
            else if (nfx.isTrailing(cp)) return MAYBE;
            else return "";
        }
 		public String getListingValue(int cp) {
    		return getValue(cp, LONG);
    	}
        public boolean hasValue(int cp) { return getValue(cp).length() != 0; }
    };
    private DerivedProperty(UCD ucd) {
        ucdData = ucd;
        nfd = nf[NFD] = new Normalizer(Normalizer.NFD, ucdData.getVersion());
        nfc = nf[NFC] = new Normalizer(Normalizer.NFC, ucdData.getVersion());
        nfkd = nf[NFKD] = new Normalizer(Normalizer.NFKD, ucdData.getVersion());
        nfkc = nf[NFKC] = new Normalizer(Normalizer.NFKC, ucdData.getVersion());
        for (int i = ExpandsOnNFD; i <= ExpandsOnNFKC; ++i) {
            dprops[i] = new ExDProp(i-ExpandsOnNFD);
        }
        for (int i = GenNFD; i <= GenNFKC; ++i) {
            dprops[i] = new GenDProp(i-GenNFD);
        }
        for (int i = NFC_Leading; i <= NFC_Resulting; ++i) {
            dprops[i] = new NFC_Prop(i);
        }
        for (int i = NFD_UnsafeStart; i <= NFKC_UnsafeStart; ++i) {
            dprops[i] = new NF_UnsafeStartProp(i-NFD_UnsafeStart);
        }
        dprops[ID_Start] = new UCDProperty() {
            {
                type = DERIVED_CORE;
                name = "ID_Start";
                shortName = "IDS";
                header = "# Derived Property: " + name
                    + "\r\n#  Characters that can start an identifier."
                    + "\r\n#  Generated from Lu+Ll+Lt+Lm+Lo+Nl+Other_ID_Start";
            }
            public boolean hasValue(int cp) {
                return ucdData.isIdentifierStart(cp);
            }
        };
        dprops[ID_Continue_NO_Cf] = new UCDProperty() {
            {
                name = "ID_Continue";
                type = DERIVED_CORE;
                shortName = "IDC";
                header = "# Derived Property: " + name
                    + "\r\n#  Characters that can continue an identifier."
                    + "\r\n#  Generated from: ID_Start + Mn+Mc+Nd+Pc + Other_ID_Continue"
                    + "\r\n#  NOTE: Cf characters should be filtered out.";
            }
            public boolean hasValue(int cp) {
                return ucdData.isIdentifierContinue_NO_Cf(cp);
            }
        };
        StringBuffer tempBuf = new StringBuffer();
        //System.out.println("Deriving data for XID");
        // special hack for middle dot
        XID_Continue_Set.add(0x00B7);
        //System.out.println("Adding (2)" + ucdData.getCodeAndName(0x00B7));
        for (int cp = 0; cp < 0x10FFFF; ++cp) {
            // skip cases that can't matter
            if (!ucdData.isAssigned(cp)) continue;
            // find out normal status
            int status = 0;
            if (ucdData.isIdentifierStart(cp)) status = 1;
            else if (ucdData.isIdentifierContinue_NO_Cf(cp)) status = 2;
            if (status != 0 && !nfkd.isNormalized(cp)) {
                // now find out NFKD status
                // if it is <start><extend>*, then it is start
                // else if it is <extend>*, then it is extend
                // else it is nothing
                int status2 = 0;
                tempBuf.setLength(0);
                nfkd.normalize(UTF32.valueOf32(cp), tempBuf);
                for (int i = 0; i < tempBuf.length(); i += UTF32.count16(cp)) {
                    int cp2 = UTF32.char32At(tempBuf, i);
                    if (i == 0) {
                        if (ucdData.isIdentifierStart(cp2)) status2 = 1;
                        else if (ucdData.isIdentifierContinue_NO_Cf(cp2)) status2 = 2;
                        else {
                            status2 = 0;
                            break;
                        }
                    } else if (!ucdData.isIdentifierContinue_NO_Cf(cp2) && cp2 != 0xB7) {
                        status2 = 0;
                        break;
                    }
                }
                // Now see if the statuses are compatible.
                if (status != status2) {
                    //System.out.println("Need to do something with:");
                    //System.out.println("  " + status + ": " + ucdData.getCodeAndName(cp));
                    //System.out.println("  " + status2 + ": " + ucdData.getCodeAndName(tempBuf.toString()));
                    if (status2 == 0) status = 0;
                    else if (status2 > status) status = status2;
                    //System.out.println("  " + status + ": " + ucdData.getCodeAndName(cp));
                }
            }
            if (status == 1) XID_Start_Set.add(cp);
            if (status != 0) XID_Continue_Set.add(cp);
        }
        dprops[Mod_ID_Start] = new UCDProperty() {
            {
                type = DERIVED_CORE;
                name = "XID_Start";
                shortName = "XIDS";
                header = "# Derived Property: " + name
                    + "\r\n#  ID_Start modified for closure under NFKx"
                    + "\r\n#  Modified as described in UAX #15"
                    + "\r\n#  NOTE: Does NOT remove the non-NFKx characters."
                    + "\r\n#        Merely ensures that if isIdentifer(string) then isIdentifier(NFKx(string))";
            }
            public boolean hasValue(int cp) {
                return XID_Start_Set.contains(cp);
            }
        };
        dprops[Mod_ID_Continue_NO_Cf] = new UCDProperty() {
            {
                type = DERIVED_CORE;
                name = "XID_Continue";
                shortName = "XIDC";
                header = "# Derived Property: " + name
                    + "\r\n#  Mod_ID_Continue modified for closure under NFKx"
                    + "\r\n#  Modified as described in UAX #15"
                    + "\r\n#  NOTE: Cf characters should be filtered out."
                    + "\r\n#  NOTE: Does NOT remove the non-NFKx characters."
                    + "\r\n#        Merely ensures that if isIdentifer(string) then isIdentifier(NFKx(string))";
            }
            public boolean hasValue(int cp) {
                return XID_Continue_Set.contains(cp);
            }
        };
        dprops[PropMath] = new UCDProperty() {
            {
                type = DERIVED_CORE;
                name = "Math";
                shortName = name;
                header = "# Derived Property: " + name
                    + "\r\n#  Generated from: Sm + Other_Math";
            }
            public boolean hasValue(int cp) {
                byte cat = ucdData.getCategory(cp);
                if (cat == Sm
                || ucdData.getBinaryProperty(cp,Math_Property)) return true;
                return false;
            }
        };
        dprops[PropAlphabetic] = new UCDProperty() {
            {
                type = DERIVED_CORE;
               name = "Alphabetic";
                shortName = "Alpha";
                header = "# Derived Property: " + name
                    + "\r\n#  Generated from: Lu+Ll+Lt+Lm+Lo+Nl + Other_Alphabetic";
            }
            public boolean hasValue(int cp) {
                byte cat = ucdData.getCategory(cp);
                if (cat == Lu || cat == Ll || cat == Lt || cat == Lm || cat == Lo || cat == Nl
                || ucdData.getBinaryProperty(cp, Other_Alphabetic)) return true;
                return false;
            }
        };
        dprops[PropLowercase] = new UCDProperty() {
            {
                type = DERIVED_CORE;
                name = "Lowercase";
                shortName = "Lower";
                header = "# Derived Property: " + name
                    + "\r\n#  Generated from: Ll + Other_Lowercase";
            }
            public boolean hasValue(int cp) {
                byte cat = ucdData.getCategory(cp);
                if (cat == Ll
                || ucdData.getBinaryProperty(cp, Other_Lowercase)) return true;
                return false;
            }
        };
        dprops[PropUppercase] = new UCDProperty() {
            {
                type = DERIVED_CORE;
                name = "Uppercase";
                shortName = "Upper";
                header = "# Derived Property: " + name
                    + "\r\n#  Generated from: Lu + Other_Uppercase";
            }
            public boolean hasValue(int cp) {
                byte cat = ucdData.getCategory(cp);
                if (cat == Lu
                || ucdData.getBinaryProperty(cp, Other_Uppercase)) return true;
                return false;
            }
        };
        for (int i = Missing_Uppercase; i <= Missing_Mixedcase; ++i) {
            dprops[i] = new CaseDProp(i);
        }
 /*
 (3) Singleton Decompositions: characters that  can be derived from the UnicodeData file by
 including all characters whose canonical decomposition consists of a single character.
 (4) Non-Starter Decompositions: characters that  can be derived from the UnicodeData
 file by including all characters whose canonical decomposition consists of a sequence
 of characters, the first of which has a non-zero combining class.
 */
        dprops[FullCompExclusion] = new UCDProperty() {
            {
                type = DERIVED_NORMALIZATION;
                name = "Full_Composition_Exclusion";
                shortName = "Comp_Ex";
                defaultValueStyle = defaultPropertyStyle = SHORT;
                header = "# Derived Property: " + name
                    + "\r\n#  Generated from: Composition Exclusions + Singletons + Non-Starter Decompositions";
            }
            public boolean hasValue(int cp) {
                if (!ucdData.isRepresented(cp)) return false;
                byte dtype = ucdData.getDecompositionType(cp);
                if (dtype != CANONICAL) return false;
                if (isCompEx(cp)) return true;
                return false;
            }
 		    /*public String getListingValue(int cp) {
    		    return "Comp_Ex";
    	    }*/
            /*
 			public String getListingValue(int cp) {
        		if (getValueType() != BINARY) return getValue(cp, SHORT);
        		return getProperty(SHORT);
 			}
 			*/
        };
        dprops[FullCompInclusion] = new UCDProperty() {
            {
                isStandard = false;
                type = DERIVED_NORMALIZATION;
                name = "Full_Composition_Inclusion";
                shortName = "Comp_In";
                defaultValueStyle = defaultPropertyStyle = SHORT;
                header = "# Derived Property: " + name
                    + ": Full Composition Inclusion"
                    + "\r\n#  characters with Canonical Decompositions MINUS Full Composition Exclusion";
            }
            public boolean hasValue(int cp) {
                if (!ucdData.isRepresented(cp)) return false;
                byte dtype = ucdData.getDecompositionType(cp);
                if (dtype != CANONICAL) return false;
                if (isCompEx(cp)) return true;
                return false;
            }
        };
        dprops[FC_NFKC_Closure] = new UCDProperty() {
            {
                type = DERIVED_NORMALIZATION;
                setValueType(STRING_PROP);
                name = "FC_NFKC_Closure";
                shortName = "FC_NFKC";
                header = "# Derived Property: " + name
                    + "\r\n#  Generated from computing: b = NFKC(Fold(a)); c = NFKC(Fold(b));"
                    + "\r\n#  Then if (c != b) add the mapping from a to c to the set of"
                    + "\r\n#  mappings that constitute the FC_NFKC_Closure list"
                    + "\r\n#  Uses the full case folding from CaseFolding.txt, without the T option."
                    ;
            }
            public String getValue(int cp, byte style) {
                if (!ucdData.isRepresented(cp)) return "";
                String b = nfkc.normalize(fold(cp));
                String c = nfkc.normalize(fold(b));
                if (c.equals(b)) return "";
                return "FNC; " + Utility.hex(c);
            } // default
            public boolean hasValue(int cp) { return getValue(cp).length() != 0; }
        };
        dprops[FC_NFC_Closure] = new UCDProperty() {
            {
                type = DERIVED_NORMALIZATION;
                isStandard = false;
                name = "FC_NFC_Closure";
                setValueType(STRING_PROP);
                shortName = "FC_NFC";
                header = "# Derived Property: " + name
                    + "\r\n#  Generated from computing: b = NFC(Fold(a)); c = NFC(Fold(b));"
                    + "\r\n#  Then if (c != b) add the mapping from a to c to the set of"
                    + "\r\n#  mappings that constitute the FC_NFC_Closure list"
                    + "\r\n#  Uses the full case folding from CaseFolding.txt, without the T option."
                    ;
            }
            public String getValue(int cp, byte style) { 
                if (!ucdData.isRepresented(cp)) return "";
                String b = nfc.normalize(fold(cp));
                String c = nfc.normalize(fold(b));
                if (c.equals(b)) return "";
                return "FN; " + Utility.hex(c);
            } // default
            public boolean hasValue(int cp) { return getValue(cp).length() != 0; }
        };
        for (int i = QuickNFD; i <= QuickNFKC; ++i) {
            dprops[i] = new QuickDProp(i - QuickNFD);
        }        
        dprops[DefaultIgnorable] = new UCDProperty() {
            {
                type = DERIVED_CORE;
                name = "Default_Ignorable_Code_Point";
                hasUnassigned = true;
                shortName = "DI";
                header = null;
            }
            public String getHeader() {
                if (ucdData.getCompositeVersion() > 0x040000) return "# Derived Property: " + name
                   + "\r\n#  Generated from (Other_Default_Ignorable_Code_Point + Variation_Selector"
                   + "\r\n#    + Noncharacter_Code_Point + Cf + Cc + Cs) - White_Space"
                   + "\r\n#    -  U+FFF9..U+FFFB// INTERLINEAR ANNOTATION characters";
                   //+ "\r\n#    - U+0600..U+0603 - U+06DD - U+070F"
               return  "# Derived Property: " + name
                + "\r\n#  Generated from (Other_Default_Ignorable_Code_Point + Cf + Cc + Cs) - White_Space";
            }
            public boolean hasValue(int cp) {
                if (ucdData.getBinaryProperty(cp, White_space)) return false;
                if (ucdData.getBinaryProperty(cp, Other_Default_Ignorable_Code_Point)) return true;
                if (ucdData.getCompositeVersion() > 0x040000 && cp >= 0xFFF9 && cp <= 0xFFFB) return false;
                byte cat = ucdData.getCategory(cp);
                if (cat == Cf || cat == Cs || cat == Cc) return true;
                if (ucdData.getCompositeVersion() <= 0x040000) return false;
                //if (cp >= 0xFFF9 && cp <= 0xFFFB) return false;
            	//if (0x2060 <= cp && cp <= 0x206F || 0xFFF0 <= cp && cp <= 0xFFFB || 0xE0000 <= cp && cp <= 0xE0FFF) return true;
            	//if (0x0600 <= cp && cp <= 0x0603 || 0x06DD == cp || 0x070F == cp) return false;
                if (ucdData.getBinaryProperty(cp, Variation_Selector)) return true;
                if (ucdData.getBinaryProperty(cp, Noncharacter_Code_Point)) return true;
                return false;
            }
        };
        dprops[Case_Sensitive] = new UCDProperty() {
            {
                type = DERIVED_CORE;
                isStandard = false;
                name = "Case_Sensitive";
                hasUnassigned = false;
                shortName = "CS";
                header = header = "# Derived Property: " + name
                    + "\r\n#  Generated from all characters that are either on the right or left side of a case mapping";
            }
            UnicodeSet case_sensitive = null;
            UnicodeSet tempSet = new UnicodeSet();
            UnicodeSet cased = null;
            PrintWriter log;
            private void addCase(String cps, byte c1, byte c2) {
                String temp = ucdData.getCase(cps, c1, c2);
                if (temp.equals(cps)) return;
                //temp = nfc.normalize(temp);
                //if (temp.equals(cps)) return;
                tempSet.clear();
                tempSet.addAll(cps);
                tempSet.addAll(temp);
                if (!case_sensitive.containsAll(tempSet)) {
                    tempSet.removeAll(case_sensitive);
                    if (!cased.containsAll(tempSet)) {
                        log.println();
                        log.println("Adding " + tempSet + " because of: ");
                        log.println("\t" + ucdData.getCodeAndName(cps));
                        log.println("=>\t" + ucdData.getCodeAndName(temp));
                    }
                    case_sensitive.addAll(tempSet);
                }
            }
            public boolean hasValue(int cp) {
                if (case_sensitive == null) {
                    try {
                        log = Utility.openPrintWriter("Case_Sensitive_Log.txt", Utility.UTF8_UNIX);
                        System.out.println("Building Case-Sensitive cache");
                        case_sensitive = new UnicodeSet();
                        cased = DerivedProperty.make(PropLowercase, ucdData).getSet()
                            .addAll(DerivedProperty.make(PropUppercase, ucdData).getSet())
                            .addAll(UnifiedBinaryProperty.make(CATEGORY | Lt).getSet());
                        for (int c = 0; c < 0x10FFFF; ++c) {
                            Utility.dot(c);
                            // skip cases that can't matter
                            if (!ucdData.isAssigned(c)) continue;
                            String cps = UTF16.valueOf(c);
                            addCase(cps, FULL, LOWER);
                            addCase(cps, FULL, UPPER);
                            addCase(cps, FULL, TITLE);
                            addCase(cps, FULL, FOLD);
                            addCase(cps, SIMPLE, LOWER);
                            addCase(cps, SIMPLE, UPPER);
                            addCase(cps, SIMPLE, TITLE);
                            addCase(cps, SIMPLE, FOLD);
                        }
                        Utility.fixDot();
                        UnicodeSet temp;
                        log.println("Cased, but not Case_Sensitive");
                        temp = new UnicodeSet().addAll(cased).removeAll(case_sensitive);
                        Utility.showSetNames(log, "", temp, false, false, ucdData);
                        log.println("Case_Sensitive, but not Cased");
                        temp = new UnicodeSet().addAll(case_sensitive).removeAll(cased);
                        Utility.showSetNames(log, "", temp, false, false, ucdData);
                        log.println("Both Case_Sensitive, and Cased");
                        temp = new UnicodeSet().addAll(case_sensitive).retainAll(cased);
                        log.println(temp);
                        System.out.println("Done Building Case-Sensitive cache");
                        log.close();
                    } catch (Exception e) {
                        throw new ChainException("internal error", null, e);
                    }
                }
                return case_sensitive.contains(cp);  
            }
        };
        dprops[Other_Case_Ignorable] = new UCDProperty() {
            {
                name = "Other_Case_Ignorable";
                shortName = "OCI";
                isStandard = false;
                header = header = "# Binary Property";
            }
            public boolean hasValue(int cp) {
                switch(cp) {
                    case 0x27: case 0x2019: case 0xAD: return true;
                    //  case 0x2d: case 0x2010: case 0x2011: 
 /*
 0027          ; Other_Case_Ignorable # Po       APOSTROPHE
 00AD          ; Other_Case_Ignorable # Pd       SOFT HYPHEN
 2019          ; Other_Case_Ignorable # Pf       RIGHT SINGLE QUOTATION MARK
 */
                }
                return false;
            }
        };
        dprops[Type_i] = new UCDProperty() {
            {
                type = DERIVED_CORE;
                isStandard = false;
                name = "DSoft_Dotted";
                shortName = "DSDot";
                header = header = "# Derived Property: " + name
                    + "\r\n#  Generated from: all characters whose canonical decompositions end with a combining character sequence that"
                    + "\r\n# - starts with i or j"
                    + "\r\n# - has no combining marks above"
                    + "\r\n# - has no combining marks with zero canonical combining class"
                ;
            }
            public boolean hasValue(int cp) {
                if (hasSoftDot(cp)) return true;
                if (nfkd.isNormalized(cp)) return false;
                String decomp = nfd.normalize(cp);
                boolean ok = false;
                for (int i = decomp.length()-1; i >= 0; --i) {
                    int ch = UTF16.charAt(decomp, i);
                    int cc = ucdData.getCombiningClass(ch);
                    if (cc == 230) return false;
                    if (cc == 0) {
                        if (!hasSoftDot(ch)) return false;
                        ok = true;
                    }
                }
                return ok;
            }
            boolean hasSoftDot(int ch) {
                return ch == 'i' || ch == 'j' || ch == 0x0268 || ch == 0x0456 || ch == 0x0458;
            }
        };
        dprops[Case_Ignorable] = new UCDProperty() {
            {
                name = "Case_Ignorable";
                isStandard = false;
                shortName = "CI";
                header = header = "# Derived Property: " + name
                    + "\r\n#  Generated from: Other_Case_Ignorable + Lm + Mn + Me + Cf";
            }
            public boolean hasValue(int cp) {
                byte cat = ucdData.getCategory(cp);
                if (cat == Lm || cat == Cf || cat == Mn || cat == Me) return true;
                if (dprops[Other_Case_Ignorable].hasValue(cp)) return true;
                return false;
            }
        };
 /*
        GraphemeExtend = 27,
        GraphemeBase = 28,
 # GraphemeExtend := Me + Mn + Mc + Other_GraphemeExtend - GraphemeLink
 # GraphemeBase := 
 */
        dprops[GraphemeExtend] = new UCDProperty() {
            {
                type = DERIVED_CORE;
                name = "Grapheme_Extend";
                shortName = "Gr_Ext";
                header = header = "# Derived Property: " + name
                    + "\r\n#  Generated from: Me + Mn + Other_Grapheme_Extend"
                    + "\r\n#  Note: depending on an application's interpretation of Co (private use),"
                    + "\r\n#  they may be either in Grapheme_Base, or in Grapheme_Extend, or in neither."                    
                    ;
            }
            public boolean hasValue(int cp) {
            	//if (cp == 0x034F) return false;
                //if (ucdData.getBinaryProperty(cp, GraphemeLink)) return false;
                // || cat == Mc
                byte cat = ucdData.getCategory(cp);
                if (cat == Me || cat == Mn
                        || ucdData.getBinaryProperty(cp,Other_GraphemeExtend)) return true;
                return false;
            }
        };
        dprops[GraphemeBase] = new UCDProperty() {
            {
                type = DERIVED_CORE;
                name = "Grapheme_Base";
                shortName = "Gr_Base";
                header = header = "# Derived Property: " + name
                    + "\r\n#  Generated from: [0..10FFFF] - Cc - Cf - Cs - Co - Cn - Zl - Zp - Grapheme_Extend"
                    + "\r\n#  Note: depending on an application's interpretation of Co (private use),"
                    + "\r\n#  they may be either in Grapheme_Base, or in Grapheme_Extend, or in neither."                    
                    ;
            }
            public boolean hasValue(int cp) {
            	//if (cp == 0x034F) return false;
                byte cat = ucdData.getCategory(cp);
                if (cat == Cc || cat == Cf || cat == Cs || cat == Co || cat == Cn || cat == Zl || cat == Zp) return false;
                // || ucdData.getBinaryProperty(cp,GraphemeLink)
                if (dprops[GraphemeExtend].hasValue(cp)) return false;
                return true;
            }
        };
        for (int i = 0; i < dprops.length; ++i) {
            UCDProperty up = dprops[i];
            if (up == null) continue;
            if (up.getValueType() != BINARY_PROP) continue;
            up.setValue(NUMBER, "1");
            up.setValue(SHORT, "T");
            up.setValue(LONG, "True");
        }
    }
    byte getDecompCat(int cp) {
        byte cat = ucdData.getCategory(cp);
        if (cat == Lu
            || ucdData.getBinaryProperty(cp, Other_Uppercase)) return Lu;
        if (cat == Ll
            || ucdData.getBinaryProperty(cp, Other_Lowercase)) return Ll;
        if (cat == Lt || cat == Lo || cat == Lm || cat == Nl) return cat;
       // if (true) throw new IllegalArgumentException("FIX nf[2]");
        if (nf[NFKD].isNormalized(cp)) return Lo;
        String norm = nf[NFKD].normalize(cp);
        int cp2;
        boolean gotUpper = false;
        boolean gotLower = false;
        boolean gotTitle = false;
        for (int i = 0; i < norm.length(); i += UTF32.count16(cp2)) {
            cp2 = UTF32.char32At(norm, i);
            byte catx = ucdData.getCategory(cp2);
            boolean upx = ucdData.getBinaryProperty(cp, Other_Uppercase);
            boolean lowx = ucdData.getBinaryProperty(cp, Other_Lowercase);
            if (catx == Ll || lowx || cp2 == 0x345) gotLower = true;
            if (catx == Lu || upx) gotUpper = true;
            if (catx == Lt) gotTitle = true;
        }
        if (gotLower && !gotUpper && !gotTitle) return Ll;
        if (!gotLower && gotUpper && !gotTitle) return Lu;
        if (gotLower || gotUpper || gotTitle) return Lt;
        return cat;
    }
    boolean isCompEx(int cp) {
        if (ucdData.getBinaryProperty(cp, CompositionExclusion)) return true;
        String decomp = ucdData.getDecompositionMapping(cp);
        if (UTF32.length32(decomp) == 1) return true;
        int first = UTF32.char32At(decomp,0);
        if (ucdData.getCombiningClass(first) != 0) return true;
        return false;
    }
    String fold(int cp) {
        return ucdData.getCase(cp, FULL, FOLD);
    }
    String fold(String s) {
        return ucdData.getCase(s, FULL, FOLD);
    }
    public static void test() {
        /*
        DerivedProperty dprop = new DerivedProperty(Default.ucd);
        for (int j = 0; j < LIMIT; ++j) {
            System.out.println();
            System.out.println(j + "\t" + dprop.getName(j));
            System.out.println(dprop.getHeader(j));
        }
        */
        for (int cp = 0xA0; cp < 0xFF; ++cp) {
            System.out.println();
            System.out.println(Default.ucd().getCodeAndName(cp));
            for (int j = 0; j < DERIVED_PROPERTY_LIMIT; ++j) {
                String prop = make(j, Default.ucd()).getValue(cp);
                if (prop.length() != 0) System.out.println("\t" + prop);
            }
        }
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/DerivedPropertyLister.java
+++ b/tools/unicodetools/com/ibm/text/UCD/DerivedPropertyLister.java
@ -1,118 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedPropertyLister.java,v $
 * $Date: 2006/06/09 21:21:20 $
 * $Revision: 1.13 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCD;
 import java.io.*;
 import java.util.*;
 import com.ibm.text.utility.*;
 final class DerivedPropertyLister extends PropertyLister {
    static final boolean BRIDGE = false;
    //static int enum = 0;
    //private int propMask;
    //private DerivedProperty dprop;
    private UCDProperty uprop;
    int width;
    boolean varies;
    public DerivedPropertyLister(UCD ucd, int propMask, PrintWriter output) {
        //this.propMask = propMask;
        this.output = output;
        this.ucdData = ucd;
        // this.dprop = new DerivedProperty(ucd);
        uprop = DerivedProperty.make(propMask, ucd);
        varies = uprop.getValueType() < BINARY_PROP;
        width = super.minPropertyWidth();
        switch (propMask) {
          case DerivedProperty.GenNFD: case DerivedProperty.GenNFC: case DerivedProperty.GenNFKD: case DerivedProperty.GenNFKC:
            alwaysBreaks = true;
            break;
          case DerivedProperty.FC_NFKC_Closure:
            alwaysBreaks = true;
            width = 21;
            break;
          case DerivedProperty.QuickNFC: case DerivedProperty.QuickNFKC:
            width = 11;
            break;
        }
    }
    public String headerString() {
        return uprop.getHeader();
    }
    public String valueName(int cp) {
    	return uprop.getListingValue(cp);
    }
    //public String optionalComment(int cp) {
    //    return super.optionalComment(cp) + " [" + ucdData.getCodeAndName(computedValue) + "]";
    //}
    public int minPropertyWidth() {
        return width;
    }
    /*
    public String optionalComment(int cp) {
        String id = ucdData.getCategoryID(cp);
        if (UCD.mainCategoryMask(ucdData.getCategory(cp)) == LETTER_MASK) return id.substring(0,1) + "*";
        return id;
    }
    */
    /*
    public String optionalName(int cp) {
        if ((propMask & 0xFF00) == DECOMPOSITION_TYPE) {
            return Utility.hex(ucdData.getDecompositionMapping(cp));
        } else {
            return "";
        }
    }
    */
    String last;
    public byte status(int cp) {
        if (!uprop.hasUnassigned() && !ucdData.isAssigned(cp)) return EXCLUDE;
        if (!varies) {
            return uprop.hasValue(cp) ? INCLUDE : EXCLUDE;
        }
        String prop = uprop.getValue(cp);
        if (prop.length() == 0) return EXCLUDE;
        if (prop.equals(last)) return INCLUDE;
        last = prop;
        return BREAK;
    }
    /*
    static Map computedValue = new HashMap();
    static String getComputedValue(int cp) {
        return (String) computedValue.get(new Integer(cp));
    }
    static void setComputedValue(int cp, String value) {
        computedValue.put(new Integer(cp), value);
    }
    static String lastValue = "";
    static String currentValue = "";
    StringBuffer foldBuffer = new StringBuffer();
    */
 }
--- a/tools/unicodetools/com/ibm/text/UCD/DiffPropertyLister.java
+++ b/tools/unicodetools/com/ibm/text/UCD/DiffPropertyLister.java
@ -1,158 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DiffPropertyLister.java,v $
 * $Date: 2004/02/06 18:30:22 $
 * $Revision: 1.9 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCD;
 import com.ibm.icu.text.UnicodeSet;
 import java.io.*;
 class DiffPropertyLister extends PropertyLister {
    private UCD oldUCD;
    private UnicodeSet set = new UnicodeSet();
    private static final int NOPROPERTY = -1;
    public DiffPropertyLister(String oldUCDName, String newUCDName, PrintWriter output, int property) {
        this.output = output;
        this.ucdData = UCD.make(newUCDName);
        if (property != NOPROPERTY) newProp = DerivedProperty.make(property, ucdData);
        if (oldUCDName != null) {
        	this.oldUCD = UCD.make(oldUCDName);
        	if (property != NOPROPERTY) oldProp = DerivedProperty.make(property, oldUCD);
        }
        breakByCategory = property != NOPROPERTY;
        useKenName = false;
        usePropertyComment = false;
    }
    public DiffPropertyLister(String oldUCDName, String newUCDName, PrintWriter output) {
    	this(oldUCDName, newUCDName, output, NOPROPERTY);
    }
    public UnicodeSet getSet() {
        return set;
    }
    public String valueName(int cp) {
        return major_minor_only(ucdData.getVersion());
    }
    /*
    public String optionalName(int cp) {
        if ((propMask & 0xFF00) == DECOMPOSITION_TYPE) {
            return Utility.hex(ucdData.getDecompositionMapping(cp));
        } else {
            return "";
        }
    }
    */
 	UCDProperty newProp = null;
 	UCDProperty oldProp = null;
 	String value = "";
    public String optionalComment(int cp) {
    	String normal = super.optionalComment(cp);
    	if (oldUCD != null && breakByCategory) {
    	    byte modCat = oldUCD.getModCat(cp, breakByCategory ? CASED_LETTER_MASK : 0);
            normal = oldUCD.getModCatID_fromIndex(modCat) + "/" + normal;
        }
        return normal;
    }
    byte getModCat(int cp) {
    	byte result = ucdData.getModCat(cp, breakByCategory ? CASED_LETTER_MASK : -1);
    	//System.out.println(breakByCategory + ", " + ucdData.getModCatID_fromIndex(result));
    	return result;
    }
    public byte status(int cp) {
    	if (newProp == null) {
        	if (ucdData.isAllocated(cp) && (oldUCD == null || !oldUCD.isAllocated(cp))) {
    	        set.add(cp);
        	    return INCLUDE;
        	} else {
        	    return EXCLUDE;
        	}
    	}
    	// just look at property differences among allocated characters
    	if (!ucdData.isAllocated(cp)) return EXCLUDE;    	
    	if (!oldUCD.isAllocated(cp)) return EXCLUDE;   
    	String val = newProp.getValue(cp);
    	String oldVal = oldProp.getValue(cp);
    	if (!oldVal.equals(val)) {
    	    set.add(cp);
    	    return INCLUDE;
    	}
    	return EXCLUDE;
        /*if (cp == 0xFFFF) {
            System.out.println("# " + Utility.hex(cp));
        }
        */
    }
    public String headerString() {
        String result;
        if (oldUCD != null) {
            result = "# Differences between " 
                + major_minor_only(ucdData.getVersion()) 
                + " and " 
                + major_minor_only(oldUCD.getVersion());
        } else {
            result = "# Designated as of " 
                + major_minor_only(ucdData.getVersion())
                + " [excluding removed Hangul Syllables]";
        }
        //System.out.println("hs: " + result);
        return result;
    }
    /*
    public int print() {
        String status;
        if (oldUCD != null) {
            status = "# Differences between " + ucdData.getVersion() + " and " + oldUCD.getVersion();
        } else {
            status = "# Allocated as of " + ucdData.getVersion();
        }
        output.println();
        output.println();
        output.println(status);
        output.println();
        System.out.println(status);
        int count = super.print();
        output.println();
        if (oldUCD != null) {
            output.println("# Total " + count + " new code points allocated in " + ucdData.getVersion());
        } else {
            output.println("# Total " + count + " code points allocated in " + ucdData.getVersion());
        }
        output.println();
        return count;
    }
    */
    private String major_minor_only(String s) {
    	if (newProp != null) return s;
        return s.substring(0, s.lastIndexOf('.'));
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java
@ -1,624 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java,v $
 * $Date: 2006/04/05 22:12:45 $
 * $Revision: 1.18 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCD;
 import java.util.*;
 import java.io.*;
 import com.ibm.icu.text.UTF16;
 import com.ibm.text.utility.*;
 public class GenerateCaseFolding implements UCD_Types {
    public static boolean DEBUG = false;
    public static boolean COMMENT_DIFFS = false; // ON if we want a comment on mappings != lowercase
    public static boolean PICK_SHORT = false; // picks short value for SIMPLE if in FULL, changes weighting
    public static boolean NF_CLOSURE = false; // picks short value for SIMPLE if in FULL, changes weighting
    static final int CHECK_CHAR = 0x130; // for debugging, change to actual character, otherwise -1
    // PICK_SHORT & NF_CLOSURE = false for old style
    /*public static void main(String[] args) throws java.io.IOException {
        makeCaseFold(arg[0]);
        //getAge();
    }
    */
    static PrintWriter log;
    public static void makeCaseFold(boolean normalized) throws java.io.IOException {
        PICK_SHORT = NF_CLOSURE = normalized;
        log = Utility.openPrintWriter("CaseFoldingLog" + UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_UNIX);
        System.out.println("Writing Log: " + "CaseFoldingLog" + UnicodeDataFile.getFileSuffix(true));
        System.out.println("Making Full Data");
        Map fullData = getCaseFolding(true, NF_CLOSURE, "");
        Utility.fixDot();
        System.out.println("Making Simple Data");
        Map simpleData = getCaseFolding(false, NF_CLOSURE, "");
        // write the data
        System.out.println("Making Turkish Full Data");
        Map fullDataTurkish = getCaseFolding(true, NF_CLOSURE, "tr");
        Utility.fixDot();
        System.out.println("Making Simple Data");
        Map simpleDataTurkish = getCaseFolding(false, NF_CLOSURE, "tr");
        // write the data
        Utility.fixDot();
        System.out.println("Writing");
        String filename = "CaseFolding";
        if (normalized) filename += "-Normalized";
        String directory = "DerivedData/";
        UnicodeDataFile fc = UnicodeDataFile.openAndWriteHeader(directory, filename);
        PrintWriter out = fc.out;
        /*
        PrintWriter out = new PrintWriter(
            new BufferedWriter(
            new OutputStreamWriter(
                new FileOutputStream(directory + fileRoot + GenerateData.getFileSuffix()),
                "UTF8"),
            4*1024));
        */
        for (int ch = 0; ch <= 0x10FFFF; ++ch) {
            Utility.dot(ch);
            if (!charsUsed.get(ch)) continue;
            String rFull = (String)fullData.get(UTF32.valueOf32(ch));
            String rSimple = (String)simpleData.get(UTF32.valueOf32(ch));
            String rFullTurkish = (String)fullDataTurkish.get(UTF32.valueOf32(ch));
            String rSimpleTurkish = (String)simpleDataTurkish.get(UTF32.valueOf32(ch));
            if (rFull == null && rSimple == null && rFullTurkish == null && rSimpleTurkish == null) continue;
            if (rFull != null && rFull.equals(rSimple) 
              || (PICK_SHORT && UTF16.countCodePoint(rFull) == 1)) {
                String type = "C";
                if (ch == 0x49) {
                	drawLine(out, ch, "C", "i");
                	drawLine(out, ch, "T", "\u0131");
                } else if (ch == 0x130) {
                	drawLine(out, ch, "F", "i\u0307");
                	drawLine(out, ch, "T", "i");
                } else if (ch == 0x131) {
                	// do nothing
                	//drawLine(out, ch, "I", "i");
                } else {
                	drawLine(out, ch, type, rFull);
                }
            } else {
                if (rFull != null) {
                    drawLine(out, ch, "F", rFull);
                }
                if (rSimple != null) {
                    drawLine(out, ch, "S", rSimple);
                }
            }
            if (rFullTurkish != null && !rFullTurkish.equals(rFull)) {
                drawLine(out, ch, "T", rFullTurkish);
            }
            if (rSimpleTurkish != null && !rSimpleTurkish.equals(rSimple)) {
                drawLine(out, ch, "t", rSimpleTurkish);
            }
        }
        fc.close();
        log.close();
    }
 /* Goal is following (with no entries for 0131 or 0069)
 0049; C; 0069; # LATIN CAPITAL LETTER I
 0049; T; 0131; # LATIN CAPITAL LETTER I
 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
 */
    static void drawLine(PrintWriter out, int ch, String type, String result) {
        String comment = "";
        if (COMMENT_DIFFS) {
            String lower = Default.ucd().getCase(UTF16.valueOf(ch), FULL, LOWER);
            if (!lower.equals(result)) {
                String upper = Default.ucd().getCase(UTF16.valueOf(ch), FULL, UPPER);
                String lower2 = Default.ucd().getCase(UTF16.valueOf(ch), FULL, LOWER);
                if (lower.equals(lower2)) {
                    comment = "[Diff " + Utility.hex(lower, " ") + "] ";
                } else {
                    Utility.fixDot();
                    System.out.println("PROBLEM WITH: " + Default.ucd().getCodeAndName(ch));
                    comment = "[DIFF " + Utility.hex(lower, " ") + ", " + Utility.hex(lower2, " ") + "] ";
                }
            }
        }
        out.println(Utility.hex(ch)
            + "; " + type
            + "; " + Utility.hex(result, " ")
            + "; # " + comment + Default.ucd().getName(ch));
    }
    static int probeCh = 0x01f0;
    static String shower = UTF16.valueOf(probeCh);
    static Map getCaseFolding(boolean full, boolean nfClose, String condition) throws java.io.IOException {
        Map data = new TreeMap();
        Map repChar = new TreeMap();
        //String option = "";
        // get the equivalence classes
        for (int ch = 0; ch <= 0x10FFFF; ++ch) {
            Utility.dot(ch);
            //if ((ch & 0x3FF) == 0) System.out.println(Utility.hex(ch));
            if (!Default.ucd().isRepresented(ch)) continue;
            getClosure(ch, data, full, nfClose, condition);
        }
        // get the representative characters
        Iterator it = data.keySet().iterator();
        while (it.hasNext()) {
            String s = (String) it.next();
            Set set = (Set) data.get(s);
            show = set.contains(shower);
            if (show) {
                Utility.fixDot();
                System.out.println(toString(set));
            }
        // Pick the best available representative
            String rep = null;
            int repGood = 0;
            String dup = null;
            Iterator it2 = set.iterator();
            while (it2.hasNext()) {
                String s2 = (String)it2.next();
                int s2Good = goodness(s2, full, condition);
                if (s2Good > repGood) {
                    rep = s2;
                    repGood = s2Good;
                    dup = null;
                } else if (s2Good == repGood) {
                    dup = s2;
                }
            }
            if (rep == null) {
                Utility.fixDot();
                System.err.println("No representative for: " + toString(set));
            } else if ((repGood & (NFC_FORMAT | ISLOWER)) != (NFC_FORMAT | ISLOWER)) {
                String message = "";
                if ((repGood & NFC_FORMAT) == 0) {
                    message += " [NOT NFC FORMAT]";
                }
                if ((repGood & ISLOWER) == 0) {
                    message += " [NOT LOWERCASE]";
                }
                Utility.fixDot();
                log.println("Non-Optimal Representative " + message);
                log.println(" Rep:\t" + Default.ucd().getCodeAndName(rep));
                log.println(" Set:\t" + toString(set,true, true));
            }
            log.println();
            log.println();
            log.println(rep + "\t#" + Default.ucd().getName(rep));
        // Add it for all the elements of the set
            it2 = set.iterator();
            while (it2.hasNext()) {
                String s2 = (String)it2.next();
                if (s2.equals(rep)) continue;
                log.println(s2 + "\t#" + Default.ucd().getName(s2));
                if (UTF16.countCodePoint(s2) == 1) {
                    repChar.put(UTF32.getCodePointSubstring(s2,0), rep);
                    charsUsed.set(UTF16.charAt(s2, 0));
                }
            }
        }
        return repChar;
    }
    static BitSet charsUsed = new BitSet();
    static boolean show = false;
    static final int NFC_FORMAT = 64;
    static final int ISLOWER = 128;
    static int goodness(String s, boolean full, String condition) {
        if (s == null) return 0;
        int result = 32-s.length();
        if (!PICK_SHORT) {
            result = s.length();
        }
        if (!full) result <<= 8;
        String low = lower(upper(s, full, condition), full, condition);
        if (s.equals(low)) result |= ISLOWER;
        else if (PICK_SHORT && Default.nfd().normalize(s).equals(Default.nfd().normalize(low))) result |= ISLOWER;
        if (s.equals(Default.nfc().normalize(s))) result |= NFC_FORMAT;
        if (show) {
            Utility.fixDot();
            System.out.println(Utility.hex(result) + ", " + Default.ucd().getCodeAndName(s));
        }
        return result;
    }
    /*
    static HashSet temp = new HashSet();
    static void normalize(HashSet set) {
        temp.clear();
        temp.addAll(set);
        set.clear();
        Iterator it = temp.iterator();
        while (it.hasNext()) {
            String s = (String) it.next();
            String s2 = KC.normalize(s);
            set.add(s);
            data2.put(s,set);
            if (!s.equals(s2)) {
                set.add(s2);
                data2.put(s2,set);
                System.err.println("Adding " + Utility.hex(s) + " by " + Utility.hex(s2));
            }
        }
    }
    */
            /*
            String
            String lower1 = Default.ucd.getLowercase(ch);
            String lower2 = Default.ucd.toLowercase(ch,option);
            char ch2 = Default.ucd.getLowercase(Default.ucd.getUppercase(ch).charAt(0)).charAt(0);
            //String lower1 = String.valueOf(Default.ucd.getLowercase(ch));
            //String lower = Default.ucd.toLowercase(ch2,option);
            String upper = Default.ucd.toUppercase(ch2,option);
            String lowerUpper = Default.ucd.toLowercase(upper,option);
            //String title = Default.ucd.toTitlecase(ch2,option);
            //String lowerTitle = Default.ucd.toLowercase(upper,option);
            if (ch != ch2 || lowerUpper.length() != 1 || ch != lowerUpper.charAt(0)) { //
                output.println(Utility.hex(ch)
                    + "; " + (lowerUpper.equals(lower1) ? "L" : lowerUpper.equals(lower2) ? "S" : "E")
                    + "; " + Utility.hex(lowerUpper," ")
                    + ";\t#" + Default.ucd.getName(ch)
                    );
                //if (!lowerUpper.equals(lower)) {
                //    output.println("Warning1: " + Utility.hex(lower) + " " + Default.ucd.getName(lower));
                //}
                //if (!lowerUpper.equals(lowerTitle)) {
                //    output.println("Warning2: " + Utility.hex(lowerTitle) + " " + Default.ucd.getName(lowerTitle));
                //}
            }
            */
    static void getClosure(int ch, Map data, boolean full, boolean nfClose, String condition) {
        String charStr = UTF32.valueOf32(ch);
        String lowerStr = lower(charStr, full, condition);
        String titleStr = title(charStr, full, condition);
        String upperStr = upper(charStr, full, condition);
        if (charStr.equals(lowerStr) && charStr.equals(upperStr) && charStr.equals(titleStr)) return;
        if (DEBUG) System.err.println("Closure for " + Utility.hex(ch));
        // make new set
        Set set = new TreeSet();
        set.add(charStr);
        data.put(charStr, set);
        // add cases to get started
        add(set, lowerStr, data);
        add(set, upperStr, data);
        add(set, titleStr, data);
        // close it
        main:
        while (true) {
            Iterator it = set.iterator();
            while (it.hasNext()) {
                String s = (String) it.next();
                // do funny stuff since we can't modify set while iterating
                // We don't do this because if the source is not normalized, we don't want to normalize
                if (nfClose) {
                    if (add(set, Default.nfd().normalize(s), data)) continue main;
                    if (add(set, Default.nfc().normalize(s), data)) continue main;
                    if (add(set, Default.nfkd().normalize(s), data)) continue main;
                    if (add(set, Default.nfkc().normalize(s), data)) continue main;
                }
                if (add(set, lower(s, full, condition), data)) continue main;
                if (add(set, title(s, full, condition), data)) continue main;
                if (add(set, upper(s, full, condition), data)) continue main;
            }
            break;
        }
    }
    static String lower(String s, boolean full, String condition) {
        String result = lower2(s,full, condition);
        return result.replace('\u03C2', '\u03C3'); // HACK for lower
    }
    // These functions are no longer necessary, since Default.ucd is parameterized,
    // but it's not worth changing
    static String lower2(String s, boolean full, String condition) {
        /*if (!full) {
            if (s.length() != 1) return s;
            return Default.ucd.getCase(UTF32.char32At(s,0), SIMPLE, LOWER);
        }
        */
        return Default.ucd().getCase(s, full ? FULL : SIMPLE, LOWER, condition);
    }
    static String upper(String s, boolean full, String condition) {
        /* if (!full) {
            if (s.length() != 1) return s;
            return Default.ucd.getCase(UTF32.char32At(s,0), FULL, UPPER);
        }
        */
        return Default.ucd().getCase(s, full ? FULL : SIMPLE, UPPER, condition);
    }
    static String title(String s, boolean full, String condition) {
        /*if (!full) {
            if (s.length() != 1) return s;
            return Default.ucd.getCase(UTF32.char32At(s,0), FULL, TITLE);
        }
        */
        return Default.ucd().getCase(s, full ? FULL : SIMPLE, TITLE, condition);
    }
    static boolean add(Set set, String s, Map data) {
        if (set.contains(s)) return false;
        set.add(s);
        if (DEBUG) System.err.println("adding: " + toString(set));
        Set other = (Set) data.get(s);
        if (other != null && other != set) { // merge
            // make all the items in set point to merged set
            Iterator it = other.iterator();
            while (it.hasNext()) {
                data.put(it.next(), set);
            }
            set.addAll(other);
        }
        if (DEBUG) System.err.println("done adding: " + toString(set));
        return true;
    }
    static String toString(Set set) {
        return toString(set, false, false);
    }
    static String toString(Set set, boolean name, boolean crtab) {
        String result = "{";
        Iterator it2 = set.iterator();
        boolean first = true;
        while (it2.hasNext()) {
            String s2 = (String) it2.next();
            if (!first) {
                if (crtab) {
                    result += ";\r\n\t";
                } else {
                    result += "; ";
                }
            }
            first = false;
            if (name) {
                result += Default.ucd().getCodeAndName(s2);
            } else {
                result += Utility.hex(s2, " ");
            }
        }
        return result + "}";
    }
    static boolean specialNormalizationDiffers(int ch) {
        if (ch == 0x00DF) return true;                  // es-zed
        return !Default.nfkd().isNormalized(ch);
    }
    static String specialNormalization(String s) {
        if (s.equals("\u00DF")) return "ss";
        return Default.nfkd().normalize(s);
    }
    static boolean isExcluded(int ch) {
        // if (ch == 0x130) return true;                  // skip LATIN CAPITAL LETTER I WITH DOT ABOVE
        if (ch == 0x0132 || ch == 0x0133) return true; // skip IJ, ij
        if (ch == 0x037A) return true;                 // skip GREEK YPOGEGRAMMENI
        if (0x249C <= ch && ch <= 0x24B5) return true; // skip PARENTHESIZED LATIN SMALL LETTER A..
        if (0x20A8 <= ch && ch <= 0x217B) return true; // skip Rupee..
        byte type = Default.ucd().getDecompositionType(ch);  
        if (type == COMPAT_SQUARE) return true;
        //if (type == COMPAT_UNSPECIFIED) return true;
        return false;
    }
    static void generateSpecialCasing(boolean normalize) throws IOException {
        Map sorted = new TreeMap();
        String suffix2 = "";
        if (normalize) suffix2 = "-Normalized";
        PrintWriter log = Utility.openPrintWriter("SpecialCasingExceptions"
            + suffix2 + UnicodeDataFile.getFileSuffix(true), Utility.LATIN1_UNIX);
        for (int ch = 0; ch <= 0x10FFFF; ++ch) {
            Utility.dot(ch);
            if (!Default.ucd().isRepresented(ch)) continue;
            if (!specialNormalizationDiffers(ch)) continue;
            String lower = Default.nfc().normalize(Default.ucd().getCase(ch, SIMPLE, LOWER));
            String upper = Default.nfc().normalize(Default.ucd().getCase(ch, SIMPLE, UPPER));
            String title = Default.nfc().normalize(Default.ucd().getCase(ch, SIMPLE, TITLE));
            String chstr = UTF16.valueOf(ch);
            String decomp = specialNormalization(chstr);
            String flower = Default.nfc().normalize(Default.ucd().getCase(decomp, SIMPLE, LOWER));
            String fupper = Default.nfc().normalize(Default.ucd().getCase(decomp, SIMPLE, UPPER));
            String ftitle = Default.nfc().normalize(Default.ucd().getCase(decomp, SIMPLE, TITLE));
            String base = decomp;
            String blower = specialNormalization(lower);
            String bupper = specialNormalization(upper);
            String btitle = specialNormalization(title);
            if (true) {
                flower = Default.nfc().normalize(flower);
                fupper = Default.nfc().normalize(fupper);
                ftitle = Default.nfc().normalize(ftitle);
                base = Default.nfc().normalize(base);
                blower = Default.nfc().normalize(blower);
                bupper = Default.nfc().normalize(bupper);
                btitle = Default.nfc().normalize(btitle);
            }
            if (ch == CHECK_CHAR) {
                System.out.println("Code: " + Default.ucd().getCodeAndName(ch));
                System.out.println("Decomp: " + Default.ucd().getCodeAndName(decomp));
                System.out.println("Base: " + Default.ucd().getCodeAndName(base));
                System.out.println("SLower: " + Default.ucd().getCodeAndName(lower));
                System.out.println("FLower: " + Default.ucd().getCodeAndName(flower));
                System.out.println("BLower: " + Default.ucd().getCodeAndName(blower));
                System.out.println("STitle: " + Default.ucd().getCodeAndName(title));
                System.out.println("FTitle: " + Default.ucd().getCodeAndName(ftitle));
                System.out.println("BTitle: " + Default.ucd().getCodeAndName(btitle));
                System.out.println("SUpper: " + Default.ucd().getCodeAndName(upper));
                System.out.println("FUpper: " + Default.ucd().getCodeAndName(fupper));
                System.out.println("BUpper: " + Default.ucd().getCodeAndName(bupper));
            }
            // presumably if there is a single code point, it would already be in the simple mappings
            if (UTF16.countCodePoint(flower) == 1 && UTF16.countCodePoint(fupper) == 1 
                	&& UTF16.countCodePoint(title) == 1) {
            	if (ch == CHECK_CHAR) System.out.println("Skipping single code point: " + Default.ucd().getCodeAndName(ch));
            	continue;
            }
            // if there is no change from the base, skip
            if (flower.equals(base) && fupper.equals(base) && ftitle.equals(base)) {
            	if (ch == CHECK_CHAR) System.out.println("Skipping equals base: " + Default.ucd().getCodeAndName(ch));
            	continue;
            }
            // fix special cases
            // if (flower.equals(blower) && fupper.equals(bupper) && ftitle.equals(btitle)) continue;
            if (flower.equals(blower)) flower = lower;
            if (fupper.equals(bupper)) fupper = upper;
            if (ftitle.equals(btitle)) ftitle = title;
            // if there are no changes from the original, or the expanded original, skip
            if (flower.equals(lower) && fupper.equals(upper) && ftitle.equals(title)) {
            	if (ch == CHECK_CHAR) System.out.println("Skipping unchanged: " + Default.ucd().getCodeAndName(ch));
            	continue;
            }
            String name = Default.ucd().getName(ch);
            int order = name.equals("LATIN SMALL LETTER SHARP S") ? 1
                : ch == 0x130 ? 2
                : name.indexOf("ARMENIAN SMALL LIGATURE") >= 0 ? 4
                : name.indexOf("LIGATURE") >= 0 ? 3
                : name.indexOf("GEGRAMMENI") < 0 ? 5
                : UTF16.countCodePoint(ftitle) == 1 ? 6
                : UTF16.countCodePoint(fupper) == 2 ? 7
                : 8;
            if (ch == CHECK_CHAR) System.out.println("Order: " + order + " for " + Default.ucd().getCodeAndName(ch));
            // HACK
            boolean denormalize = !normalize && order != 6 && order != 7;
            String mapping = Utility.hex(ch)
                + "; " + Utility.hex(flower.equals(base) ? chstr : denormalize ? Default.nfd().normalize(flower) : flower)
                + "; " + Utility.hex(ftitle.equals(base) ? chstr : denormalize ? Default.nfd().normalize(ftitle) : ftitle)
                + "; " + Utility.hex(fupper.equals(base) ? chstr : denormalize ? Default.nfd().normalize(fupper) : fupper)
                + "; # " + Default.ucd().getName(ch);
            // special exclusions 
            if (isExcluded(ch)) {
                log.println("# " + mapping);
            } else {
                int x = ch;
                if (ch == 0x01F0) x = 0x03B1; // HACK to reorder the same
                sorted.put(new Integer((order << 24) | x), mapping);
            }
        }
        log.close();
        System.out.println("Writing");
        //String newFile = "DerivedData/SpecialCasing" + suffix2 + UnicodeDataFile.getFileSuffix(true);
        //PrintWriter out = Utility.openPrintWriter(newFile, Utility.LATIN1_UNIX);
        UnicodeDataFile udf = UnicodeDataFile.openAndWriteHeader("DerivedData/", "SpecialCasing" + suffix2);
        PrintWriter out = udf.out;
 /*       String[] batName = {""};
        String mostRecent = UnicodeDataFile.generateBat("DerivedData/", "SpecialCasing", suffix2 + UnicodeDataFile.getFileSuffix(true), batName);
        out.println("# SpecialCasing" + UnicodeDataFile.getFileSuffix(false));
        out.println(UnicodeDataFile.generateDateLine());
        out.println("#");
 */        
        //Utility.appendFile("com/ibm/text/UCD/SpecialCasingHeader.txt", Utility.UTF8, out);
        Iterator it = sorted.keySet().iterator();
        int lastOrder = -1;
        while (it.hasNext()) {
            Integer key = (Integer) it.next();
            String line = (String) sorted.get(key);
            int order = key.intValue() >> 24;
            if (order != lastOrder) {
                lastOrder = order;
                out.println();
                boolean skipLine = false;
                switch(order) {
                case 1: 
                    out.println("# The German es-zed is special--the normal mapping is to SS.");
                    out.println("# Note: the titlecase should never occur in practice. It is equal to titlecase(uppercase(<es-zed>))");
                    break;
                case 2:
                    out.println("# Preserve canonical equivalence for I with dot. Turkic is handled below.");
 					break;                	
                case 3: out.println("# Ligatures"); break;
                case 4: skipLine = true; break;
                case 5: out.println("# No corresponding uppercase precomposed character"); break;
                case 6: Utility.appendFile("com/ibm/text/UCD/SpecialCasingIota.txt", Utility.UTF8, out); break;
                case 7: out.println("# Some characters with YPOGEGRAMMENI also have no corresponding titlecases"); break;
                case 8: skipLine = true; break;
                }
                if (!skipLine) out.println();
            }
            out.println(line);
        }
        Utility.appendFile("com/ibm/text/UCD/SpecialCasingFooter.txt", Utility.UTF8, out);
        udf.close();
        //Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]);
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateCaseTest.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateCaseTest.java
@ -1,93 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateCaseTest.java,v $
 * $Date: 2004/02/07 01:01:15 $
 * $Revision: 1.2 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCD;
 import java.util.*;
 import java.io.*;
 import com.ibm.text.utility.*;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;
 abstract public class GenerateCaseTest implements UCD_Types {
    public static void main(String[] args) throws IOException {
        System.out.println("Remember to add length marks (half & full) and other punctuation for sentence, with FF61");
        PrintWriter out = Utility.openPrintWriter("CaseTest.txt", Utility.UTF8_WINDOWS);
        out.println("# CaseTest");
        out.println("# Generated: " + Default.getDate() + ", MED");
        Utility.appendFile("CaseTestHeader.txt", Utility.LATIN1, out);
        for (int cp = 0; cp < 0x10FFFF; ++cp) {
            Utility.dot(cp);
            if (!Default.ucd().isAllocated(cp)) continue;
            if (Default.ucd().isHangulSyllable(cp)) continue;
            byte cat = Default.ucd().getCategory(cp);
            if (cp == PRIVATE_USE) continue;
            String lower = Default.ucd().getCase(cp, FULL, LOWER); 
            String upper = Default.ucd().getCase(cp, FULL, UPPER); 
            String title = Default.ucd().getCase(cp, FULL, TITLE); 
            String fold = Default.ucd().getCase(cp, FULL, FOLD);
            if (lower.equals(upper) 
                && lower.equals(title) 
                && lower.equals(fold)) continue;
            String s = UTF16.valueOf(cp);
            write(out, s, true);
            // if (cp == '\u0345') continue; // don't add combining for this special case
            s = s + testChar;
            String s2 = Default.nfd().normalize(s);
            String lower1 = Default.nfc().normalize(Default.ucd().getCase(s2, FULL, LOWER)); 
            String upper1 = Default.nfc().normalize(Default.ucd().getCase(s2, FULL, UPPER)); 
            String title1 = Default.nfc().normalize(Default.ucd().getCase(s2, FULL, TITLE)); 
            String fold1 = Default.nfc().normalize(Default.ucd().getCase(s2, FULL, FOLD));
            if (lower1.equals(Default.nfc().normalize(lower+testChar))
                && upper1.equals(Default.nfc().normalize(upper+testChar))
                && title1.equals(Default.nfc().normalize(title+testChar))
                && fold1.equals(Default.nfc().normalize(fold+testChar))
            ) continue;
            write(out, s, true);
        }
        out.println("# total lines: " + counter);
        out.close();
    }
    static final char testChar = '\u0316';
    static int counter = 0;
    static void write(PrintWriter out, String ss, boolean doComment) {
        String s = Default.nfd().normalize(ss);
        String lower = Default.nfc().normalize(Default.ucd().getCase(s, FULL, LOWER)); 
        String upper = Default.nfc().normalize(Default.ucd().getCase(s, FULL, UPPER)); 
        String title = Default.nfc().normalize(Default.ucd().getCase(s, FULL, TITLE)); 
        String fold = Default.nfc().normalize(Default.ucd().getCase(s, FULL, FOLD));
        out.println(Utility.hex(ss) + "; "
            + Utility.hex(lower) + "; "
            + Utility.hex(upper) + "; "
            + Utility.hex(title) + "; "
            + Utility.hex(fold)
            + (doComment ?  "\t# " + Default.ucd().getName(ss) : "")
        );
        counter++;
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateConfusables.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateConfusables.java
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateData.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateData.java
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateLineBreakTest.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateLineBreakTest.java
@ -1,777 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateLineBreakTest.java,v $
 * $Date: 2004/04/17 18:21:39 $
 * $Revision: 1.5 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCD;
 import java.util.*;
 import java.io.*;
 import com.ibm.text.utility.*;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;
 public class GenerateLineBreakTest implements UCD_Types {
    // COMMON STUFF for Hangul
    static final byte hNot = -1, hL = 0, hV = 1, hT = 2, hLV = 3, hLVT = 4, hLIMIT = 5;
    static final String[] hNames = {"L", "V", "T", "LV", "LVT"};
    static byte getHangulType(int cp) {
        if (Default.ucd().isLeadingJamo(cp)) return hL;
        if (Default.ucd().isVowelJamo(cp)) return hV;
        if (Default.ucd().isTrailingJamo(cp)) return hT;
        if (Default.ucd().isHangulSyllable(cp)) {
            if (Default.ucd().isDoubleHangul(cp)) return hLV;
            return hLVT;
        }
        return hNot;
    }
    //============================
    protected String rule;
    protected String fileName = "Line";
    // all the other items are supplied in UCD_TYPES
    static byte LB_L = LB_LIMIT + hL, LB_V = LB_LIMIT + hV, LB_T = LB_LIMIT + hT, 
        LB_LV = LB_LIMIT + hLV, LB_LVT = LB_LIMIT + hLVT, LB_SUP = LB_LIMIT + hLIMIT,
        LB2_LIMIT = (byte)(LB_SUP + 1);
    String[] samples = new String[100];
    byte[] TypeOrder = {
        LB_OP, LB_CL, LB_QU, LB_GL, LB_NS, LB_EX, LB_SY, LB_IS, LB_PR, LB_PO,
        LB_NU, LB_AL, LB_ID, LB_IN, LB_HY, LB_BA, LB_BB, LB_B2, LB_ZW, LB_CM,
        // missing from Pair Table
        LB_SP, LB_BK, LB_CR, LB_LF, 
        // resolved types below
        LB_CB, LB_AI, LB_SA, LB_SG, LB_XX,
        // 3 JAMO CLASSES, plus supplementary
        LB_L, LB_V, LB_T, LB_LV, LB_LVT, LB_SUP
    };
    public static void main(String[] args) throws IOException {
        new GenerateLineBreakTest().run();
        new GenerateWordBreakTest().run();
    }
    // stuff that subclasses need to override
    public void run() throws IOException {     
        findSamples();
        // test individual cases
        //printLine(out, samples[LB_ZW], "", samples[LB_CL]);
        //printLine(out, samples[LB_ZW], " ", samples[LB_CL]);
        PrintWriter out = Utility.openPrintWriter(fileName + "BreakTest.html", Utility.UTF8_WINDOWS);
        out.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'><title>"
            + fileName + "</title></head>");
        out.println("<body bgcolor='#FFFFFF'><h3>Current (fixed only for consistency):</h3>");
        generateTable(out, false);
        out.println("<h3>Recommended:</h3>");
        generateTable(out, true);
        out.println("</body></html>");
        out.close();
        String[] testCase = new String[50];
        // do main test
        for (int k = 0; k < 2; ++k) {
            out = Utility.openPrintWriter(fileName + (k == 0 ? "Test_SHORT.txt" : "Test.txt"), Utility.LATIN1_WINDOWS);
            int counter = 0;
            out.println("# Default " + fileName + " Break Test");
            out.println("# Generated: " + Default.getDate() + ", MED");
            out.println("#");
            out.println("# Format:");
            out.println("# <string> (# <comment>)? ");
            out.println("#  <string> contains hex Unicode code points, with ");
            out.println("#\t" + BREAK + " wherever there is a break opportunity, and ");
            out.println("#\t" + NOBREAK + " wherever there is not.");
            out.println("#  <comment> the format can change, but currently it shows:");
            out.println("#\t- the sample character name");
            out.println("#\t- (x) the line_break property* for the sample character");
            out.println("#\t- [x] the rule that determines whether there is a break or not");
            out.println("#");
            out.println("# Samples:");
            out.println("# The test currently takes all pairs of linebreak types*,");
            out.println("# picks a sample for each type, and generates three strings: ");
            out.println("#\t- the pair alone");
            out.println("#\t- the pair alone with an imbeded space");
            out.println("#\t- the pair alone with embedded combining marks");
            out.println("# The sample for each type is simply the first code point (above NULL)");
            out.println("# with that property.");
            out.println("# * Note:");
            out.println("#\t- SG is omitted");
            out.println("#\t- 3 different Jamo characters and a supplementary character are added");
            out.println("#\t  The syllable types for the Jamo (L, V, T) are displayed in comments");
            out.println("#\t  instead of the linebreak property");
            out.println("# These samples may be extended in the future.");
            out.println("#");
            for (int ii = 0; ii < getLimit(); ++ii) {
                int i = TypeOrder[ii];
                if (i == LB_SG) continue;
                String before = samples[i];
                for (int jj = 0; jj < getLimit(); ++jj) {
                    Utility.dot(counter);
                    int j = TypeOrder[jj];
                    if (j == LB_SG) continue;
                    String after = samples[j];
                    // do line straight
                    int len = genTestItems(before, after, testCase);
                    for (int q = 0; q < len; ++q) {
                        printLine(out, testCase[q], k != 0 && q == 0, false);
                        ++counter;
                    }
                }
            }
            out.println("# Lines: " + counter);
            out.close();
        }
    }
    // stuff that subclasses need to override
    public int genTestItems(String before, String after, String[] results) {
        results[0] = before + after;
        results[1] = before + " " + after;
        results[2] = before + "\u0301\u0308" + after;
        return 3;
    }
    // stuff that subclasses need to override
    boolean skipType(byte type) {
        return type == LB_AI || type == LB_SA || type == LB_SG || type == LB_XX;
    }
    // stuff that subclasses need to override
    public String getTypeID(int cp) {
        byte result = getType(cp);
        if (result == LB_SUP) return "SUP";
        if (result >= LB_LIMIT) return hNames[result - LB_LIMIT];
        return Default.ucd().getLineBreakID_fromIndex(result);
    }
    // stuff that subclasses need to override
    public byte getType(int cp) {
        if (cp > 0xFFFF) return LB_SUP;
        byte result = getHangulType(cp);
        if (result != hNot) return (byte)(result + LB_LIMIT);
        return Default.ucd().getLineBreak(cp);
    }
    public int getLimit() {
        return LB2_LIMIT;
    }
    public int getTableLimit() {
        return LB_SUP; // skip last;
    }
    public void generateTable(PrintWriter out, boolean recommended) {
        String width = "width='" + (100 / (getTableLimit() + 1)) + "%'";
        out.print("<table border='1' cellspacing='0'><tr><th " + width + "></th>");
        byte type;
        for (int i = 0; i < getTableLimit(); ++i) {
            type = TypeOrder[i];
            if (skipType(type)) continue;
            String h = getTypeID(samples[TypeOrder[i]]);
            out.print("<th " + width + ">" + h + "</th>");
        }
        out.print("</tr>");
        String[] rule = new String[1];
        String[] rule2 = new String[1];
        for (int i = 0; i < getTableLimit(); ++i) {
            type = TypeOrder[i];
            if (skipType(type)) continue;
            String before = samples[type];
            String line = "<tr><th>" + getTypeID(before) + "</th>";
            for (int j = 0; j < getTableLimit(); ++j) {
                type = TypeOrder[j];
                if (skipType(type)) continue;
                String after = samples[type];
                String t = getTableEntry(before, after, recommended, rule);
                String background = "";
                String t2 = getTableEntry(before, after, !recommended, rule2);
                if (!t.equals(t2)) {
                    if (t.equals(NOBREAK)) {
                        background = " bgcolor='#CCFFFF'";
                    } else {
                        background = " bgcolor='#FFFF00'";
                    }
                } else if (t.equals(NOBREAK)) {
                    background = " bgcolor='#CCCCFF'";
                }
                line += "<th title='" + rule[0] + "'" + background + ">" + t + "</th>";
            }
            out.println(line + "</tr>");
        }
        out.println("</table>");
    }
    public String getTableEntry(String before, String after, boolean recommended, String[] ruleOut) {
        String t = "_";
        boolean spaceBreak = isBreak(before + " " + after, before.length() + 1, recommended);
        String spaceRule = rule;
        boolean spaceBreak2 = isBreak(before + " " + after, before.length(), recommended);
        String spaceRule2 = rule;
        boolean normalBreak = isBreak(before + after, before.length(), recommended);
        String normalRule = rule;
        if (!normalBreak) {
            if (!spaceBreak && !spaceBreak2) {
                t = "^";
                rule = spaceRule.equals(normalRule) ? normalRule : spaceRule + "/" + normalRule;
                if (!spaceRule2.equals(normalRule) && !spaceRule2.equals(spaceRule)) {
                    rule += "/" + spaceRule2;
                }
            } else {
                t = "%";
                rule = normalRule;
            }
        }
        ruleOut[0] = rule;
        return t;
    }
    static final String BREAK = "\u00F7";
    static final String NOBREAK = "\u00D7";
    public void printLine(PrintWriter out, String source, boolean comments, boolean recommended) {
        int cp;
        StringBuffer string = new StringBuffer();
        StringBuffer comment = new StringBuffer("\t# ");
        String status = isBreak(source, 0, recommended) ? BREAK : NOBREAK;
        string.append(status);
        comment.append(' ').append(status).append(" [").append(rule).append(']');
        for (int offset = 0; offset < source.length(); offset += UTF16.getCharCount(cp)) {
            cp = UTF16.charAt(source, offset);
            if (string.length() > 0) {
                string.append(' ');
                comment.append(' ');
            }
            string.append(Utility.hex(cp));
            comment.append(Default.ucd().getName(cp) + " (" + getTypeID(cp) + ")");
            status = isBreak(source, offset + UTF16.getCharCount(cp), recommended) ? BREAK : NOBREAK;
            string.append(' ').append(status);
            comment.append(' ').append(status).append(" [").append(rule).append(']');
        }
        if (comments) string.append(comment);
        out.println(string);
    }
    public void findSamples() {
        for (int i = 1; i <= 0x10FFFF; ++i) {
            if (!Default.ucd().isAllocated(i)) continue;
            if (0xD800 <= i && i <= 0xDFFF) continue;
            if(i == 0x1100) {
                System.out.print("here");
            }
            byte lb = getType(i);
            if (samples[lb] == null) {
                samples[lb] = UTF16.valueOf(i);
            }
        }
        for (int i = 0; i < TypeOrder.length; ++i) {
            String sample = samples[i];
            System.out.println(getTypeID(sample) + ":\t" + Default.ucd().getCodeAndName(sample));
        }
    }
    public String getTypeID(String s) {
        if (s == null) return "<null>";
        if (s.length() == 1) return getTypeID(s.charAt(0));
        StringBuffer result = new StringBuffer();
        int cp;
        for (int i = 0; i < s.length(); i += UTF32.count16(cp)) {
            cp = UTF32.char32At(s, i);
            if (i > 0) result.append(" ");
            result.append(getTypeID(cp));
        }
        return result.toString();
    }
    public int findLastNon(String source, int offset, byte notLBType, boolean recommended) {
        int cp;
        for (int i = offset-1; i >= 0; i -= UTF16.getCharCount(cp)) {
            cp = UTF16.charAt(source, i);
            byte f = getResolvedType(cp, recommended);
            if (f != notLBType) return i;
        }
        return -1;
    }
    public byte getResolvedType (int cp, boolean recommended) {
        // LB 1  Assign a line break category to each character of the input.
        // Resolve AI, CB, SA, SG, XX into other line break classes depending on criteria outside this algorithm.
        byte result = getType(cp);
        switch (result) {
            case LB_AI: result = LB_AI; break;
            // case LB_CB: result = LB_ID; break;
            case LB_SA: result = LB_AL; break;
            // case LB_SG: result = LB_XX; break; Surrogates; will never occur
            case LB_XX: result = LB_AL; break;
        }
        if (recommended) {
            if (getHangulType(cp) != hNot) {
                    result = LB_ID;
            }
        }
        return result;
    }
    public boolean onCodepointBoundary(String s, int offset) {
        if (offset < 0 || offset > s.length()) return false;
        if (offset == 0 || offset == s.length()) return true;
        if (UTF16.isLeadSurrogate(s.charAt(offset-1))
        && UTF16.isTrailSurrogate(s.charAt(offset))) return false;
        return true;
    }
    // find out whether there is a break at offset
    // WARNING: as a side effect, sets "rule"
    public boolean isBreak(String source, int offset, boolean recommended) {
        // LB 1  Assign a line break category to each character of the input.
        // Resolve AI, CB, SA, SG, XX into other line break classes depending on criteria outside this algorithm.
        // this is taken care of in the getResolvedType function
        // LB 2a  Never break at the start of text
        rule="2a";
        if (offset <= 0) return false;
        // LB 2b  Always break at the end of text
        rule="2b";
        if (offset >= source.length()) return true;
        // UTF-16: never break in the middle of a code point
        if (!onCodepointBoundary(source, offset)) return false;
        // now get the character before and after, and their types
        int cpBefore = UTF16.charAt(source, offset-1);
        int cpAfter = UTF16.charAt(source, offset);
        byte before = getResolvedType(cpBefore, recommended);
        byte after = getResolvedType(cpAfter, recommended);
        rule="3a";
        // Always break after hard line breaks (but never between CR and LF).
        // CR ^ LF
        if (before == LB_CR && after == LB_LF) return false;
        if (before == LB_BK || before == LB_LF || before == LB_CR) return true;
        //LB 3b  Don’t break before hard line breaks.
        rule="3b";
        if (after == LB_BK || after == LB_LF | after == LB_CR) return false;
        // LB 4  Don’t break before spaces or zero-width space.
        // × SP
        // × ZW
        rule="4";
        if (after == LB_SP || after == LB_ZW) return false;
        // LB 5 Break after zero-width space.
        // ZW ÷
        rule="5";
        if (before == LB_ZW) return true;
        // LB 6  Don’t break graphemes (before combining marks, around virama or on sequences of conjoining Jamos.
        rule="6";
        if (after == LB_CM) return false;
        if (before == LB_L && (after == LB_L || after == LB_V || after == LB_LV || after == LB_LVT)) return false;
        if ((before == LB_LV || before == LB_V) && (after == LB_V || after == LB_T)) return false;
        if ((before == LB_LVT || before == LB_T) && (after == LB_T)) return false;
        boolean setBase = false;
        if (before == LB_CM) {
            setBase = true;
            int backOffset = findLastNon(source, offset, LB_CM, recommended);
            if (backOffset < 0) {
                before = LB_ID;
            } else {
                before = getResolvedType(UTF16.charAt(source, backOffset), recommended);
            }
        }
        // LB 7  In all of the following rules, if a space is the base character for a combining mark,
        // the space is changed to type ID. In other words, break before SP CM* in the same cases as
        // one would break before an ID.
        rule="7";
        if (setBase && before == LB_SP) before = LB_ID;
        // LB 8  Don’t break before ‘]’ or ‘!’ or ‘;’ or ‘/’,  even after spaces.
        // × CL, × EX, × IS, × SY
        rule="8";
        if (after == LB_CL || after == LB_EX || after == LB_SY | after == LB_IS) return false;
        // find the last non-space character; we will need it
        byte lastNonSpace = before;
        if (lastNonSpace == LB_SP) {
            int backOffset = findLastNon(source, offset, LB_CM, recommended);
            if (backOffset >= 0) {
                lastNonSpace = getResolvedType(UTF16.charAt(source, backOffset), recommended);
            }
        }
        // LB 9  Don’t break after ‘[’, even after spaces.
        // OP SP* ×
        rule="9";
        if (lastNonSpace == LB_OP) return false;
        // LB 10  Don’t break within ‘”[’, , even with intervening spaces.
        // QU SP* × OP
        rule="10";
        if (lastNonSpace == LB_QU && after == LB_OP) return false;
        // LB 11  Don’t break within ‘]h’, even with intervening spaces.
        // CL SP* × NS
        rule="11";
        if (lastNonSpace == LB_CL && after == LB_NS) return false;
        // LB 11a  Don’t break within ‘——’, even with intervening spaces.
        // B2 × B2
        rule="11a";
        if (lastNonSpace == LB_B2 && after == LB_B2) return false;
        if (recommended) {
            // LB 13  Don’t break before or after NBSP or WORD JOINER
            // × GL
            // GL ×
            rule="11b";
            if (after == LB_GL || before == LB_GL) return false;
        }
        // [Note: by this time, all of the "X" in the table are accounted for. We can safely break after spaces.]
        rule="12";
        // LB 12  Break after spaces
        // SP ÷
        if (before == LB_SP) return true;
        if (!recommended) {
            // LB 13  Don’t break before or after NBSP or WORD JOINER
            // × GL
            // GL ×
            rule="13";
            if (after == LB_GL || before == LB_GL) return false;
        }
        rule="14";
        // LB 14  Don’t break before or after ‘”’
        // × QU
        // QU ×
        if (before == LB_QU || after == LB_QU) return false;
        // LB 15  Don’t break before hyphen-minus, other hyphens, fixed-width spaces,
        // small kana and other non- starters,  or after acute accents:
        // × BA
        // × HY
        // × NS
        // BB ×
        if (recommended) {
        // LB 14a  Break before and after CB
        // CB ÷
        // ÷ CB
            if (before == LB_CB || after == LB_CB) return true;       
        }
        rule="15";
        if (after == LB_NS) return false;
        if (after == LB_HY) return false;
        if (after == LB_BA) return false;
        if (before == LB_BB) return false;
        if (!recommended) {
            // LB 15b  Break after hyphen-minus, and before acute accents:
            // HY ÷
            // ÷ BB
            rule="15b";
            if (before == LB_HY) return true;
            if (after == LB_BB) return true;
        }
        // LB 16  Don’t break between two ellipses, or between letters or numbers and ellipsis:
        // AL × IN
        // ID × IN
        // IN × IN
        // NU × IN
        // Examples: ’9...’, ‘a...’, ‘H...’
        rule="16";
        if ((before == LB_NU || before == LB_AL || before == LB_ID) && after == LB_IN) return false;
        if (before == LB_IN && after == LB_IN) return false;
        // Don't break alphanumerics.
        // LB 17  Don’t break within ‘a9’, ‘3a’, or ‘H%’
        // ID × PO
        // AL × NU
        // NU × AL
        // Numbers are of the form PR ? ( OP | HY ) ? NU (NU | IS) * CL ?  PO ?
        // Examples:   $(12.35)    2,1234    (12)¢    12.54¢
        // This is approximated with the following rules. (Some cases already handled above,
        // like ‘9,’, ‘[9’.)
        rule="17";
        if (before == LB_ID && after == LB_PO) return false;
        if (before == LB_AL && after == LB_NU) return false;
        if (before == LB_NU && after == LB_AL) return false;
        // LB 18  Don’t break between the following pairs of classes.
        // CL × PO
        // HY × NU
        // IS × NU
        // NU × NU
        // NU × PO
        // PR × AL
        // PR × HY
        // PR × ID
        // PR × NU
        // PR × OP
        // SY × NU
        // Example pairs: ‘$9’, ‘$[’, ‘$-‘, ‘-9’, ‘/9’, ‘99’, ‘,9’,  ‘9%’ ‘]%’
        rule="18";
        if (before == LB_CL && after == LB_PO) return false;
        if (before == LB_HY && after == LB_NU) return false;
        if (before == LB_IS && after == LB_NU) return false;
        if (before == LB_NU && after == LB_NU) return false;
        if (before == LB_NU && after == LB_PO) return false;
        if (before == LB_PR && after == LB_AL) return false;
        if (before == LB_PR && after == LB_HY) return false;
        if (before == LB_PR && after == LB_ID) return false;
        if (before == LB_PR && after == LB_NU) return false;
        if (before == LB_PR && after == LB_OP) return false;
        if (before == LB_SY && after == LB_NU) return false;
        if (recommended) {
            // LB 15b  Break after hyphen-minus, and before acute accents:
            // HY ÷
            // ÷ BB
            rule="18b";
            if (before == LB_HY) return true;
            if (after == LB_BB) return true;
        }
        // LB 19  Don’t break between alphabetics (“at”)
        // AL × AL
        rule="19";
        if (before == LB_AL && after == LB_AL) return false;
        // LB 20  Break everywhere else
        // ALL ÷
        // ÷ ALL
        rule="20";
        return true;
    }
    static class GenerateWordBreakTest extends GenerateLineBreakTest {
        static final byte CR = 0, LF = 1, Control = 2, Extend = 3, Link = 4, CGJ = 5, Base = 6, LetterBase = 7, Other = 8,
            oLIMIT = 9, // RESET THIS IF LIST ABOVE CHANGES!
            L = oLIMIT + hL, V = oLIMIT + hV, T = oLIMIT + hT, LV = oLIMIT + hLV, LVT = oLIMIT + hLVT,
            LIMIT = LVT + 1;
        static final String[] Names = {"CR", "LF", "CTL", "Extend", "Link", "CGJ", "Base", "LetterBase", "Other" };
        static UCDProperty extendProp = UnifiedBinaryProperty.make(DERIVED | GraphemeExtend);
        static UCDProperty baseProp = UnifiedBinaryProperty.make(DERIVED | GraphemeBase);
        static UCDProperty linkProp = UnifiedBinaryProperty.make(BINARY_PROPERTIES | GraphemeLink);
        {
            fileName = "Word";
            TypeOrder = new byte[LIMIT];
            for (byte i = 0; i < TypeOrder.length; ++i) {
                TypeOrder[i] = i;
            }
        }
        boolean skipType(byte type) {
            return false;
        }
        public int getLimit() {
            return LIMIT;
        }
        public int getTableLimit() {
            return LIMIT;
        }
        // stuff that subclasses need to override
        public int genTestItems(String before, String after, String[] results) {
            results[0] = before + after;
            return 1;
        }
        public String getTableEntry(String before, String after, boolean recommended, String[] ruleOut) {
            boolean normalBreak = isBreak(before + after, before.length(), recommended);
            String normalRule = rule;
            ruleOut[0] = rule;
            return normalBreak ? BREAK : NOBREAK;
        }
        // stuff that subclasses need to override
        public String getTypeID(int cp) {
            byte type = getType(cp);
            if (type >= oLIMIT) return hNames[type - oLIMIT];
            return Names[type];
        }
        // stuff that subclasses need to override
        public byte getType(int cp) {
            // single characters
            if (cp == 0xA) return LF;
            if (cp == 0xD) return CR;
            if (cp == 0x034F) return CGJ;
            if (cp == 0x2028 || cp == 0x2029) return Control;
            // Hangul
            byte result = getHangulType(cp);
            if (result != hNot) return (byte)(result + oLIMIT);
            // other properties
            // category based
            byte cat = Default.ucd().getCategory(cp);
            if (cat == Cc) return Control;
            if (cat == Cf) return Extend;
            if (((1<<cat) & LETTER_MASK) != 0) return LetterBase;
            // other binary properties
            if (linkProp.hasValue(cp)) return Link;
            if (extendProp.hasValue(cp)) return Extend;
            if (baseProp.hasValue(cp)) return Base;
            return Other;
        }
        public byte getResolvedType(int cp, boolean recommended) {
            return getType(cp);
        }
        public boolean isBreak(String source, int offset, boolean recommended) {
            rule="1";
            if (offset < 0 || offset > source.length()) return false;
            if (offset == 0) return true;
            rule = "2";
            if (offset == source.length()) return true;
            // UTF-16: never break in the middle of a code point
            if (!onCodepointBoundary(source, offset)) return false;
            // now get the character before and after, and their types
            int cpBefore = UTF16.charAt(source, offset-1);
            int cpAfter = UTF16.charAt(source, offset);
            byte before = getResolvedType(cpBefore, recommended);
            byte after = getResolvedType(cpAfter, recommended);
            rule = "3";
            if (before == CR && after == LF) return false;
            rule = "4";
            if (before == CR || before == LF || before == Control 
                || after == Control || after == LF || after == CR) return true;
            rule = "6";
            if (before == L && (after == L || after == V || after == LV || after == LVT)) return false;
            rule = "7";
            if ((before == LV || before == V) && (after == V || after == T)) return false;
            rule = "8";
            if ((before == LVT || before == T) && (after == T)) return false;
            rule = "9";
            if (after == Extend) return false;
            if (recommended) {
                if (after == Link || after == CGJ) return false;
            } else {
                // Do not break around a CGJ.
                rule = "10";
                if (before == CGJ && (after == Base 
                    || after == LetterBase || after == L || after == V || after == T || after == LV || after == LVT)) return false;
                rule = "11";
                if (after == CGJ) return false;
                // Do not break between linking characters and letters, or before linking characters. This provides for Indic graphemes, where virama (halant) will link character clusters together.
                rule = "12";
                //Link Extend* × LetterBase  (12) 
                if (after == LetterBase || after == L || after == V || after == T || after == LV || after == LVT) {
                    int backOffset = findLastNon(source, offset, Extend, recommended);
                    if (backOffset >= 0) {
                        byte last = getResolvedType(UTF16.charAt(source, backOffset), recommended);
                        if (last == Link) return false;
                    }
                }
                rule = "13";
                if (after == Link) return false;
            }
            // Otherwise break after all characters.
            rule = "14";
            return true;
        }
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateNamedSequences.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateNamedSequences.java
@ -1,125 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateNamedSequences.java,v $
 * $Date: 2006/04/05 22:12:45 $
 * $Revision: 1.2 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCD;
 import com.ibm.text.utility.*;
 import com.ibm.icu.dev.test.util.BagFormatter;
 import com.ibm.icu.text.Transliterator;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;
 import java.util.*;
 import java.io.*;
 public final class GenerateNamedSequences implements UCD_Types {
    static final boolean DEBUG = false;
    static public String showVarGlyphs(String code0, String code1, String shape, String description) {
        if (DEBUG) System.out.println(code0 + ", " + code1 + ", [" + shape + "]");
        String abbShape = "";
        if (shape.length() != 0) {
            abbShape = '-' + shape.substring(0,4);
            if (description.indexOf("feminine") >= 0) abbShape += "fem";
        }
        return "<img alt='U+" + code0 + "+U+" + code1 + "/" + shape 
            + "' src='http://www.unicode.org/cgi-bin/varglyph?24-" +code0 + "-" + code1 + abbShape + "'>";
    }
 /*
 #   Field 0: the variation sequence
 #   Field 1: the description of the desired appearance
 #   Field 2: where the appearance is only different in in particular shaping environments
 #	this field lists them. The possible values are: isolated, initial, medial, final.
 #	If more than one is present, there are spaces between them.
 */
    static public void generate() throws IOException {
        // read the data and compose the table
        String table = "<table><tr><th width='10%'>Rep Glyph</th><th>Hex Sequence</th><th>Name</th><th>Copyable</th></tr>";
        String[] splits = new String[4];
        String[] codes = new String[20];
        String[] shapes = new String[4];
        BufferedReader in = Utility.openUnicodeFile("NamedSequences", Default.ucdVersion(), true, Utility.LATIN1);
        Transliterator unicodexml = Transliterator.getInstance("hex/xml");
        while (true) {
            String line = Utility.readDataLine(in);
            if (line == null) break;
            line = line.trim();
            if (line.length() == 0) continue;
            int count = Utility.split(line, ';', splits);
            String name = splits[0];
            int codeCount = Utility.split(splits[1], ' ', codes);
            StringBuffer codeBuffer = new StringBuffer();
            for (int i = 0; i < codeCount; ++i) {
            	UTF16.append(codeBuffer, Integer.parseInt(codes[i],16));
            }
            String codeWithHyphens = splits[1].replaceAll("\\s", "-");
            String codeAlt = "U+" + splits[1].replaceAll("\\s", " U+");
            String codeString = unicodexml.transliterate(codeBuffer.toString());
            // <img alt="03E2" src="http://www.unicode.org/cgi-bin/refglyph?24-03E2" style="vertical-align:middle">
            //table += "<tr><td><img alt='U+" + codes[0] + "' src='http://www.unicode.org/cgi-bin/refglyph?24-" + codes[0] + "'></td>\n";
            String imageName = "images/U" + codeWithHyphens + ".gif";
            if (splits[1].compareTo("1780") >= 0 && splits[1].compareTo("1800") < 0) {
                String codeNoSpaces2 = splits[1].replaceAll("\\s", "");
            	imageName = "http://www.unicode.org/reports/tr28/images/" + codeNoSpaces2 + ".gif";
            }
            table += "<tr>"
               		+ "<td class='copy'><img alt='(" + codeAlt + ")' src='" + imageName + "'><br><tt>"
 					+ splits[1] + "</tt></td>"
 					+ "<td>" + splits[1] + "</td>"
 					+ "</td><td>" + name + "</td>" 
              		+ "<td class='copy'>" + codeString + "</td>"
 					+ "</tr>\n";
            System.out.println(splits[1] + "\t" + codeString);
        }
        in.close();            
        table += "</table>";
        // now write out the results
        String directory = "DerivedData/";
        String filename = directory + "NamedSequences" + UnicodeDataFile.getHTMLFileSuffix(true);
        PrintWriter out = Utility.openPrintWriter(filename, Utility.LATIN1_UNIX);
        /*
        String[] batName = {""};
        String mostRecent = UnicodeDataFile.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName);
        String version = Default.ucd().getVersion();
        int lastDot = version.lastIndexOf('.');
        String updateDirectory = version.substring(0,lastDot) + "-Update";
        int updateV = version.charAt(version.length()-1) - '0';
        if (updateV != 0) updateDirectory += (char)('1' + updateV);
        if (DEBUG) System.out.println("updateDirectory: " + updateDirectory);
        */
        String[] replacementList = {
            "@revision@", Default.ucd().getVersion(),
            //"@updateDirectory@", updateDirectory,
            "@date@", Default.getDate(),
            "@table@", table};
        Utility.appendFile("com/ibm/text/UCD/NamedSequences-Template.html", Utility.UTF8, out, replacementList);
        out.close();
        //Utility.renameIdentical(mostRecent, Utility.getOutputName(filename), batName[0]);
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateStandardizedVariants.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateStandardizedVariants.java
@ -1,136 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateStandardizedVariants.java,v $
 * $Date: 2006/04/05 22:12:44 $
 * $Revision: 1.7 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCD;
 import com.ibm.text.utility.*;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;
 import java.util.*;
 import java.io.*;
 public final class GenerateStandardizedVariants implements UCD_Types {
    static final boolean DEBUG = false;
    static public String showVarGlyphs(String code0, String code1, String shape, String description) {
        if (DEBUG) System.out.println(code0 + ", " + code1 + ", [" + shape + "]");
        String abbShape = "";
        if (shape.length() != 0) {
            abbShape = '-' + shape.substring(0,4);
            if (description.indexOf("feminine") >= 0) abbShape += "fem";
        }
        return "<img alt='U+" + code0 + "+U+" + code1 + "/" + shape 
            + "' src='http://www.unicode.org/cgi-bin/varglyph?24-" +code0 + "-" + code1 + abbShape + "'>";
    }
 /*
 #   Field 0: the variation sequence
 #   Field 1: the description of the desired appearance
 #   Field 2: where the appearance is only different in in particular shaping environments
 #	this field lists them. The possible values are: isolated, initial, medial, final.
 #	If more than one is present, there are spaces between them.
 */
    static public void generate() throws IOException {
        // read the data and compose the table
        String table = "<table><tr><th>Rep Glyph</th><th>Character Sequence</th><th>Context</th><th width='10%'>Alt Glyph</th><th>Description of variant appearance</th></tr>";
        String[] splits = new String[4];
        String[] codes = new String[2];
        String[] shapes = new String[4];
        BufferedReader in = Utility.openUnicodeFile("StandardizedVariants", Default.ucdVersion(), true, Utility.LATIN1);
        while (true) {
            String line = Utility.readDataLine(in);
            if (line == null) break;
            if (line.length() == 0) continue;
            int count = Utility.split(line, ';', splits);
            int codeCount = Utility.split(splits[0], ' ', codes);
            int code = Utility.codePointFromHex(codes[0]);
            // <img alt="03E2" src="http://www.unicode.org/cgi-bin/refglyph?24-03E2" style="vertical-align:middle">
            table += "<tr><td><img alt='U+" + codes[0] + "' src='http://www.unicode.org/cgi-bin/refglyph?24-" + codes[0] + "'></td>\n";
            table += "<td>" + splits[0] + "</td>\n";
            String shape = splits[2].trim();
            if (shape.equals("all")) shape = "";
            table += "<td>" + Utility.replace(shape, " ", "<br>") + "</td>\n";
            // http://www.unicode.org/cgi-bin/varglyph?24-1820-180B-fina
            // http://www.unicode.org/cgi-bin/varglyph?24-222A-FE00
            table += "<td>";
            if (shape.length() == 0) {
                table += showVarGlyphs(codes[0], codes[1], "", "");
            } else {
                int shapeCount = Utility.split(shape, ' ', shapes);
                for (int i = 0; i < shapeCount; ++i) {
                    if (i != 0) table += " ";
                    table += showVarGlyphs(codes[0], codes[1], shapes[i], splits[1]);
                }
            }
            table += "</td>\n";
            table += "<td>" + Default.ucd().getName(code) + " " + splits[1] + "</td>\n";
            table += "</tr>";
        }
        in.close();            
        table += "</table>";
        // now write out the results
        String directory = "DerivedData/";
        String filename = directory + "StandardizedVariants" + UnicodeDataFile.getHTMLFileSuffix(true);
        PrintWriter out = Utility.openPrintWriter(filename, Utility.LATIN1_UNIX);
        //String[] batName = {""};
        //String mostRecent = UnicodeDataFile.generateBat(directory, filename, UnicodeDataFile.getFileSuffix(true), batName);
        String version = Default.ucd().getVersion();
        int lastDot = version.lastIndexOf('.');
        String updateDirectory;
        String partialFilename;
        if (version.compareTo("4.1.0") < 0) {
        	updateDirectory = version.substring(0,lastDot) + "-Update";
            int updateV = version.charAt(version.length()-1) - '0';
            if (updateV != 0) updateDirectory += (char)('1' + updateV);
            if (DEBUG) System.out.println("updateDirectory: " + updateDirectory);
            partialFilename = "StandardizedVariants-" + Default.ucd().getVersion();
        } else if (version.compareTo("4.1.0") == 0) {			
        	updateDirectory = version.substring(0,lastDot) + "/ucd";
            partialFilename = "StandardizedVariants";
        } else {			
        	updateDirectory = version + "/ucd";
            partialFilename = "StandardizedVariants";
        }
        String[] replacementList = {
            "@revision@", Default.ucd().getVersion(),
            "@updateDirectory@", updateDirectory,
            "@filename@", partialFilename,
            "@date@", Default.getDate(),
            "@table@", table};
        Utility.appendFile("com/ibm/text/UCD/StandardizedVariants-Template.html", Utility.UTF8, out, replacementList);
        out.close();
        //Utility.renameIdentical(mostRecent, Utility.getOutputName(filename), batName[0]);
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateStringPrep.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateStringPrep.java
@ -1,516 +0,0 @@
 /*
 * Created on May 3, 2005
 * Copyright (C) 2004-2005, Unicode, Inc., International Business Machines Corporation, and others.
 * For terms of use, see http://www.unicode.org/terms_of_use.html
 */
 package com.ibm.text.UCD;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.PrintWriter;
 import java.util.Comparator;
 import java.util.Iterator;
 import java.util.Map;
 import java.util.Set;
 import java.util.TreeSet;
 import com.ibm.icu.dev.test.util.BagFormatter;
 import com.ibm.icu.dev.test.util.TransliteratorUtilities;
 import com.ibm.icu.dev.test.util.UnicodeLabel;
 import com.ibm.icu.dev.test.util.UnicodeMap;
 import com.ibm.icu.dev.test.util.UnicodeMap.Composer;
 import com.ibm.icu.impl.CollectionUtilities;
 import com.ibm.icu.text.Collator;
 import com.ibm.icu.text.IDNA;
 import com.ibm.icu.text.StringPrepParseException;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.text.UnicodeSetIterator;
 import com.ibm.icu.text.UTF16.StringComparator;
 import com.ibm.icu.util.ULocale;
 import com.ibm.text.UCD.GenerateHanTransliterator.MultiComparator;
 import com.ibm.text.UCD.TestData.RegexMatcher;
 import com.ibm.text.utility.Utility;
 class GenerateStringPrep implements UCD_Types {
 	public static void main (String[] args) throws IOException {
 		//checkChars(false);
 		new GenerateStringPrep().genStringPrep();
 		System.out.println("Done");
 	}
 	UnicodeSet[] coreChars = new UnicodeSet[100];
 	UnicodeSet decomposable = new UnicodeSet();
 	UnicodeMap suspect = new UnicodeMap();
 	ToolUnicodePropertySource ups = ToolUnicodePropertySource.make("");
 	ToolUnicodePropertySource ups32 = ToolUnicodePropertySource.make("3.2.0");
 	//UnicodeSet id_continue = ups.getSet("ID_Continue=true");
 	UnicodeSet xid_continue = ups.getSet("XID_Continue=true");
 	UnicodeSet wordChars = new UnicodeSet();
 	{
 		if (false) {
 			wordChars.addAll(ups.getSet("name=.*MODIFIER LETTER.*", new RegexMatcher()));
 			wordChars.retainAll(ups.getSet("gc=Sk"));
 		}
 		wordChars.addAll(new UnicodeSet("[\\u0027 \\u002D \\u002E \\u003A \\u00B7 \\u058A \\u05F3" +
 		" \\u05F4 \\u200C \\u200D \\u2010 \\u2019 \\u2027 \\u30A0 \\u04C0" +
 		" \\u055A \\u02B9 \\u02BA]"));
 		//wordChars.removeAll(xid_continue);
 	}
 	UnicodeSet patternProp = ups.getSet("Pattern_Syntax=true").removeAll(wordChars);
 	UnicodeSet isNFKC = ups.getSet("NFKC_Quickcheck=NO").complement();
 	UnicodeSet non_spacing = new UnicodeSet(ups.getSet("gc=Me"))
 		.addAll(ups.getSet("gc=Mn"))
 		.removeAll(ups.getSet("Default_Ignorable_Code_Point=true"));
 	UnicodeSet not_xid_continue = new UnicodeSet(xid_continue).complement().removeAll(wordChars);
 	//UnicodeSet[] decompChars = new UnicodeSet[100];
 	UCD ucd = Default.ucd();
 	static Collator uca0 = Collator.getInstance(ULocale.ENGLISH);
 	{
 		uca0.setStrength(Collator.IDENTICAL);
 	}
 	static GenerateHanTransliterator.MultiComparator uca 
 		= new GenerateHanTransliterator.MultiComparator(new Comparator[] {
 				uca0, new UTF16.StringComparator()});
 	UnicodeSet bidiR = new UnicodeSet(
 			"[[:Bidi_Class=AL:][:Bidi_Class=R:]]");
 	UnicodeSet bidiL = new UnicodeSet("[:Bidi_Class=l:]");
 	UnicodeSet hasNoUpper = new UnicodeSet();
 	UnicodeSet hasNoUpperMinus = new UnicodeSet();
 	BagFormatter bf = new BagFormatter();
 	UnicodeSet inIDN = new UnicodeSet();
 	UnicodeSet isCaseFolded = new UnicodeSet();
 	void genStringPrep() throws IOException {
 		//showScriptToBlock();
 		bf.setShowLiteral(TransliteratorUtilities.toHTMLControl);
 		bf.setUnicodePropertyFactory(ups);
 		//bf.setValueSource(UnicodeLabel.NULL);
 		if (false) {
 			System.out.println("word chars: " + bf.showSetNames(wordChars));
 			System.out.println("pat: " + bf.showSetNames(patternProp));
 			System.out.println("xid: " + bf.showSetNames(not_xid_continue));
 		}
 		for (int cp = 0; cp <= 0x10FFFF; ++cp) {
 			Utility.dot(cp);
 			int cat = Default.ucd().getCategory(cp);
 			if (cat == UCD.Cn || cat == UCD.Co || cat == UCD.Cs) continue;
 			if (!Default.nfd().isNormalized(cp)) decomposable.add(cp);
 			// get IDNA
 			int idnaType = getIDNAType(cp);
 			idnaTypeSet[idnaType].add(cp);
 			String str = UTF16.valueOf(cp);
 			if (str.equals(ucd.getCase(str, FULL, UPPER))) hasNoUpper.add(cp);
 			if (str.equals(ucd.getCase(str, FULL, FOLD))) isCaseFolded.add(cp);
 			// scripts
 			int script = ucd.getScript(cp);
 			if (coreChars[script] == null)
 				coreChars[script] = new UnicodeSet();
 			coreChars[script].add(cp);
 		}
 		// fix characters with no uppercase
 		hasNoUpperMinus = new UnicodeSet(hasNoUpper).removeAll(wordChars);
 		System.out.println(bf.showSetNames(hasNoUpper));
 		Utility.fixDot();
 		PrintWriter htmlOut = BagFormatter.openUTF8Writer(GEN_DIR, "idn-chars.html");
 		PrintWriter htmlOut2 = BagFormatter.openUTF8Writer(GEN_DIR, "script-chars.html");
 		PrintWriter textOut = BagFormatter.openUTF8Writer(GEN_DIR, "idn-chars.txt");
 		textOut.println('\uFEFF');
 		textOut.println("For documentation, see idn-chars.html");
 		Utility.appendFile("./com/ibm/text/UCD/idn-charsHeader.html", Utility.UTF8_WINDOWS, htmlOut, 
 				new String[] {"%date%", Default.getDate()});
 		/*
 		out
 				.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
 		out.println("<title>IDN Characters</title><style>");
 		out.println("<!--");
 		out.println(".script       { font-size: 150%; background-color: #CCCCCC }");
 		out.println(".Atomic       { background-color: #CCCCFF }");
 		out.println(".Atomic-no-uppercase       { background-color: #CCFFCC }");
 		out.println(".Non-XID       { background-color: #FFCCCC }");
 		out.println(".Decomposable       { background-color: #FFFFCC }");
 		out.println(".Pattern_Syntax       { background-color: #FFCCFF }");
 		out.println("th           { text-align: left }");
 		out.println("-->");
 		out.println("</style></head><body><table>");
 		*/
 		htmlOut.println("<table border='1' cellpadding='2' cellspacing='0'>");
 		htmlOut2.println("<html><body><table border='1' cellpadding='2' cellspacing='0'>");
 		for (int scriptCode = 0; scriptCode < coreChars.length; ++scriptCode) {
 			if (scriptCode == COMMON_SCRIPT
 					|| scriptCode == INHERITED_SCRIPT)
 				continue;
 			showCodes(htmlOut, textOut, scriptCode, htmlOut2);
 		}
 		showCodes(htmlOut, textOut, COMMON_SCRIPT, htmlOut2);
 		showCodes(htmlOut, textOut, INHERITED_SCRIPT, htmlOut2);
 		showCodes(htmlOut, textOut, non_spacing);
 		htmlOut.println("</table></body></html>");
 		htmlOut.close();
 		htmlOut2.println("</table></body></html>");
 		htmlOut2.close();
 		bf.setMergeRanges(false);
 		textOut.println();
 		textOut.println("# *** ADDITIONAL WORD CHARACTERS ***");
 		textOut.println();
 		bf.setValueSource("word-chars");
 		bf.showSetNames(textOut, wordChars);
 		textOut.println();
 		textOut.println("# *** FOR REVIEW ***");
 		bf.setLabelSource(UnicodeLabel.NULL);
 		for (Iterator it = new TreeSet(suspect.getAvailableValues()).iterator(); it.hasNext();) {
 			textOut.println();
 			String value = (String)it.next();
 			bf.setValueSource(value);
 			bf.showSetNames(textOut, suspect.getSet(value));
 		}
 		textOut.close();
 		textOut = BagFormatter.openUTF8Writer(GEN_DIR, "idn_vs_cfnfkcid.txt");
 		bf = new BagFormatter();
 		bf.setUnicodePropertyFactory(ups);
 		textOut.println();
 		textOut.println("# *** Comparison of IDN with CF_NFKC_ID (case-folded, NFKC, XID), U3.2 only ***");
 		UnicodeSet U32 = ups32.getSet("gc=cn").complement();
 		UnicodeSet CF_NFKC_ID = new UnicodeSet(xid_continue).retainAll(isNFKC).retainAll(isCaseFolded).retainAll(U32);		
 		bf.showSetDifferences(textOut, "CF_NFKC_ID", CF_NFKC_ID, "IDN", idnaTypeSet[OK]);
 		textOut.close();
 	}
 	/**
 	 * 
 	 */
 	private void showScriptToBlock() {
 		UnicodeMap scripts = ToolUnicodePropertySource.make("").getProperty("script").getUnicodeMap();
 		UnicodeMap blocks = ToolUnicodePropertySource.make("").getProperty("block").getUnicodeMap();
 		UnicodeMap.Composer myCompose = new UnicodeMap.Composer() {
 			public Object compose(int codePoint, Object a, Object b) {
 				return a + "\t" + b;
 			}
 		};
 		UnicodeMap sb = ((UnicodeMap)scripts.cloneAsThawed()).composeWith(blocks, myCompose);
 		for (Iterator it = sb.getAvailableValues(new TreeSet()).iterator(); it.hasNext();) {
 			System.out.println(it.next());
 		}
 		throw new IllegalArgumentException();
 	}
 	Map scriptToGif = CollectionUtilities.asMap(script_to_gif);
 	static String[][] script_to_gif = {
 		{"Common","common.gif"}, //Miscellaneous_Symbols
 		{"Inherited","combiningdiacritics.gif"}, //Combining_Diacritical_Marks
 		{"Arabic","arabic.gif"}, //Arabic
 		{"Armenian","armenian.gif"}, //Armenian
 		{"Bengali","bengali.gif"}, //Bengali
 		{"Bopomofo","bopomofo.gif"}, //Bopomofo
 		{"Braille","braillesymbols.gif"}, //Braille_Patterns
 		{"Buginese","buginese.gif"}, //Buginese
 		{"Buhid","buhid.gif"}, //Buhid
 		{"Canadian_Aboriginal","canadiansyllabics.gif"}, //Unified_Canadian_Aboriginal_Syllabics
 		{"Cherokee","cherokee.gif"}, //Cherokee
 		{"Coptic","coptic.gif"}, //Coptic
 		{"Cypriot","cypriot.gif"}, //Cypriot_Syllabary
 		{"Cyrillic","cyrillic.gif"}, //Cyrillic
 		{"Deseret","deseret.gif"}, //Deseret
 		{"Devanagari","devanagari.gif"}, //Devanagari
 		{"Ethiopic","ethiopic.gif"}, //Ethiopic
 		{"Georgian","georgian.gif"}, //Georgian
 		{"Glagolitic","glagolitic.gif"}, //Glagolitic
 		{"Gothic","gothic.gif"}, //Gothic
 		{"Greek","greek.gif"}, //Greek_and_Coptic
 		{"Gujarati","gujarati.gif"}, //Gujarati
 		{"Gurmukhi","gurmukhi.gif"}, //Gurmukhi
 		{"Han","cjkideographcompat.gif"}, //CJK_Compatibility_Ideographs
 		{"Han","kangxiradicals.gif"}, //Kangxi_Radicals
 		{"Hangul","hangulsyllables.gif"}, //Hangul_Syllables
 		{"Hanunoo","hanunoo.gif"}, //Hanunoo
 		{"Hebrew","hebrew.gif"}, //Hebrew
 		{"Hiragana","hiragana.gif"}, //Hiragana
 		{"Kannada","kannada.gif"}, //Kannada
 		{"Katakana","katakana.gif"}, //Katakana
 		{"Kharoshthi","kharoshthi.gif"}, //Kharoshthi
 		{"Khmer","khmer.gif"}, //Khmer
 		{"Lao","lao.gif"}, //Lao
 		{"Latin","latin.gif"}, //Basic_Latin
 		{"Limbu","limbu.gif"}, //Limbu
 		{"Linear_B","linearbsyllabary.gif"}, //Linear_B_Syllabary
 		{"Malayalam","malayalam.gif"}, //Malayalam
 		{"Mongolian","mongolian.gif"}, //Mongolian
 		{"Myanmar","myanmar.gif"}, //Myanmar
 		{"New_Tai_Lue","newtailu.gif"}, //New_Tai_Lue
 		{"Ogham","ogham.gif"}, //Ogham
 		{"Old_Italic","olditalic.gif"}, //Old_Italic
 		{"Old_Persian","oldpersiancuneiform.gif"}, //Old_Persian
 		{"Oriya","oriya.gif"}, //Oriya
 		{"Osmanya","osmanya.gif"}, //Osmanya
 		{"Runic","runic.gif"}, //Runic
 		{"Shavian","shavian.gif"}, //Shavian
 		{"Sinhala","sinhala.gif"}, //Sinhala
 		{"Syloti_Nagri","silotinagri.gif"}, //Syloti_Nagri
 		{"Syriac","syriac.gif"}, //Syriac
 		{"Tagalog","tagalog.gif"}, //Tagalog
 		{"Tagbanwa","tagbanwa.gif"}, //Tagbanwa
 		{"Tai_Le","taile.gif"}, //Tai_Le
 		{"Tamil","tamil.gif"}, //Tamil
 		{"Telugu","telugu.gif"}, //Telugu
 		{"Thaana","thaana.gif"}, //Thaana
 		{"Thai","thai.gif"}, //Thai
 		{"Tibetan","tibetan.gif"}, //Tibetan
 		{"Tifinagh","tifinagh.gif"}, //Tifinagh
 		{"Ugaritic","ugaritic.gif"}, //Ugaritic
 		{"Yi","yi.gif"}, //Yi_Syllables
 	};
 	UnicodeSet idnaTypeSet[] = new UnicodeSet[IDNA_TYPE_LIMIT];
 	{
 		for (int i = 0; i < idnaTypeSet.length; ++i) idnaTypeSet[i] = new UnicodeSet();
 	}
 	static final int OK = 0, DELETED = 1, ILLEGAL = 2, REMAPPED = 3, IDNA_TYPE_LIMIT = 4;
 	/**
 	 * 
 	 */
 	static public int getIDNAType(int cp) {
 		inbuffer.setLength(0);
 		UTF16.append(inbuffer, cp);
 		try {
 			intermediate = IDNA.convertToASCII(inbuffer,
 					IDNA.DEFAULT); // USE_STD3_RULES
 			if (intermediate.length() == 0)
 				return DELETED;
 			outbuffer = IDNA.convertToUnicode(intermediate,
 					IDNA.USE_STD3_RULES);
 		} catch (StringPrepParseException e) {
 			return ILLEGAL;
 		} catch (Exception e) {
 			System.out.println("Failure at: " + Utility.hex(cp));
 			return ILLEGAL;
 		}
 		if (!TestData.equals(inbuffer, outbuffer))
 			return REMAPPED;
 		return OK;
 	}
 	static StringBuffer inbuffer = new StringBuffer();
 	static StringBuffer intermediate, outbuffer;
 	UnicodeSet lowercase = new UnicodeSet("[:Lowercase:]");
 	/**
 	 * @param htmlOut
 	 * @param textOut TODO
 	 * @param scriptCode
 	 * @param htmlOut2 TODO
 	 * @param ucd
 	 * @param coreChars
 	 * @param decompChars
 	 */
 	private void showCodes(PrintWriter htmlOut, PrintWriter textOut, int scriptCode, PrintWriter htmlOut2) {
 		if (coreChars[scriptCode] == null) return;
 		String script = Default.ucd().getScriptID_fromIndex((byte) scriptCode);
 		script = Utility.getUnskeleton(script.toLowerCase(),true);
 		System.out.println(script);
 		htmlOut.println();
 		String scriptLine = "<tr><th class='script'><img src='images/" + ((String)scriptToGif.get(script)).toLowerCase()
 		+ "'> Script: " + script + "</th></tr>";
 		htmlOut.println(scriptLine);
 		htmlOut2.println(scriptLine);
 		textOut.println();
 		textOut.println("#*** Script: " + script + " ***");
 		UnicodeSet core = new UnicodeSet(coreChars[scriptCode]);
 		UnicodeSet deleted = extract(idnaTypeSet[DELETED], core);
 		UnicodeSet illegal = extract(idnaTypeSet[ILLEGAL], core);
 		UnicodeSet remapped = extract(idnaTypeSet[REMAPPED], core);
 		UnicodeSet remappedIsNFKC = extract(isNFKC, remapped);
 		UnicodeSet remappedIsNFKCDecomp = extract(decomposable, remappedIsNFKC);
 		UnicodeSet decomp = extract(decomposable, core);
 		UnicodeSet pattern = extract(patternProp, core);
 		UnicodeSet non_id = extract(not_xid_continue, core);
 		UnicodeSet bicameralNoupper = new UnicodeSet();
 		if (!hasNoUpper.containsAll(core)) {
 			bicameralNoupper = extract(hasNoUpperMinus, core);
 		}
 		UnicodeSet foo = new UnicodeSet(bicameralNoupper).addAll(non_id);
 		for (UnicodeSetIterator it = new UnicodeSetIterator(foo); it.next(); ) {
 			String cat = Default.ucd().getCategoryID(it.codepoint);
 			String name = Default.ucd().getName(it.codepoint);
 			if (name.indexOf("MUSICAL SYMBOL") >= 0 
 					|| name.indexOf("DINGBA") >= 0 
 					|| name.indexOf("RADICAL ") >= 0 
 					 						) cat = "XX";
 			suspect.put(it.codepoint, cat);
 		}
 		if (core.size() != 0) printlnSet(htmlOut, textOut, script, "Atomic", core, scriptCode, uca);
 		if (bicameralNoupper.size() != 0) printlnSet(htmlOut, textOut, script, "Atomic-no-uppercase", bicameralNoupper, scriptCode, uca);
 		if (pattern.size() != 0) printlnSet(htmlOut, textOut, script, "Pattern_Syntax", pattern, scriptCode, uca);
 		if (non_id.size() != 0) printlnSet(htmlOut, textOut, script, "Non-XID", non_id, scriptCode, uca);
 		if (decomp.size() != 0) printlnSet(htmlOut, textOut, script, "NFD-Decomposable", decomp, scriptCode, uca);
 		if (remappedIsNFKC.size() != 0) printlnSet(htmlOut, textOut, script, "IDN-Remapped-Case-Atomic", remappedIsNFKC, scriptCode, uca);
 		if (remappedIsNFKCDecomp.size() != 0) printlnSet(htmlOut, textOut, script, "IDN-Remapped-Case-NFD-Decomposable", remappedIsNFKCDecomp, scriptCode, uca);
 		if (remapped.size() != 0) printlnSet(htmlOut, textOut, script, "IDN-Remapped-Compat", remapped, scriptCode, uca);
 		if (deleted.size() != 0) printlnSet(htmlOut, textOut, script, "IDN-Deleted", deleted, scriptCode, uca);
 		if (illegal.size() != 0) printlnSet(htmlOut, textOut, script, "IDN-Prohibited", illegal, scriptCode, uca);
 	}
 	private void showCodes(PrintWriter htmlOut, PrintWriter textOut, UnicodeSet uset) throws IOException {
 		String script = Default.ucd().getScriptID_fromIndex((byte) INHERITED_SCRIPT);
 		script = Utility.getUnskeleton(script.toLowerCase(),true);
 		String scriptLine = "<tr><th class='script'><img src='images/" 
 			+ ((String)scriptToGif.get(script)).toLowerCase()
 			+ "'> Script: " + script + "</th></tr>";
 		htmlOut.println(scriptLine);
 		UnicodeMap m = getPositions();
 		for (Iterator it = m.getAvailableValues(new TreeSet(uca)).iterator(); it.hasNext(); ) {
 			String type = (String) it.next();
 			UnicodeSet current = m.getSet(type).retainAll(non_spacing);
 			if (current.size() == 0) continue;
 			printlnSet(htmlOut, textOut, script, "Visible_Combining_Marks_" + type, current, INHERITED_SCRIPT, positionComparator);
 		}
 	}
 	/**
 	 * @throws IOException
 	 * 
 	 */
 	private UnicodeMap getPositions() throws IOException {
 		UnicodeMap result = new UnicodeMap();
 		BufferedReader in = bf.openUTF8Reader("C:\\DATA\\confusables\\", "positions.txt");
 		String type="Undetermined";
 		while (true) {
 			String line = Utility.readDataLine(in);
 			if (line == null) break;
 			if (line.length() == 0) continue;
 			if (line.startsWith("@")) {
 				type = line.substring(1);
 				continue;
 			}
 			String[] pieces = Utility.split(line, ';');
 			String code = Utility.fromHex(pieces[0]);
 			result.put(UTF16.charAt(code,0), type);
 		}
 		return result;
 	}
 	static Comparator positionComparator = new Comparator() {
 		public int compare(Object o1, Object o2) {
 			String s1 = (String)o1;
 			String s2 = (String)o2;
 			return Default.ucd().getName(s1).compareTo(Default.ucd().getName(s2));
 		}
 	};
 	/**
 	 * 
 	 */
 	private UnicodeSet extract(UnicodeSet other, UnicodeSet core) {
 		UnicodeSet decomp = new UnicodeSet(core).retainAll(other);
 		core.removeAll(decomp);
 		return decomp;
 	}
 	/**
 	 * @param htmlOut
 	 * @param textOut TODO
 	 * @param script TODO
 	 * @param unicodeset
 	 * @param scriptCode
 	 * @param comparator TODO
 	 * @param uca
 	 */
 	private  void printlnSet(PrintWriter htmlOut, PrintWriter textOut,
 			String script, String title, UnicodeSet unicodeset, int scriptCode, Comparator comparator) {
 		if (unicodeset == null)
 			return;
 		int size = unicodeset.size();
 		String dir = unicodeset.containsSome(bidiR)
 				&& unicodeset.containsNone(bidiL) ? " dir='rtl'" : "";
 		htmlOut.println("<tr><th class='" + title + "'><a href='#" +
 				title + "'>" + title + "</a> ("
 				+ TestData.nf.format(size) + ")</th></tr>");
 		htmlOut.print("<tr><td class='" + title + "'" + dir + ">");
 		// <a href="#Atomic">categorization</a>
 		textOut.println();
 		textOut.println("# " + title);
 		bf.setValueSource(script + " ; " + title);
 		UnicodeSetIterator usi = new UnicodeSetIterator();
 		if (scriptCode == HAN_SCRIPT || scriptCode == HANGUL_SCRIPT) {
 			usi.reset(unicodeset);
 			while (usi.nextRange()) {
 				if (usi.codepoint == usi.codepointEnd) {
 					htmlOut.print(formatCode(UTF16
 							.valueOf(usi.codepoint)));
 				} else {
 					htmlOut.print(formatCode(UTF16
 							.valueOf(usi.codepoint))
 							+ ".. "
 							+ formatCode(UTF16
 									.valueOf(usi.codepointEnd)));
 				}
 			}
 			bf.showSetNames(textOut, unicodeset);
 		} else {
 			Set reordered = new TreeSet(comparator);
 			usi.reset(unicodeset);
 			while (usi.next()) {
 				String x = usi.getString();
 				boolean foo = reordered.add(x);
 				if (!foo)
 					throw new IllegalArgumentException("Collision with "
 							+ Default.ucd().getCodeAndName(x));
 			}
 			for (Iterator it = reordered.iterator(); it.hasNext();) {
 				Object key = it.next();
 				htmlOut.print(formatCode((String)key));
 			}
 			bf.showSetNames(textOut, reordered);
 		}
 		htmlOut.println("</td></tr>");
 	}
 	/**
 	 * @param string
 	 * @return
 	 */
 	private String formatCode(String string) {
 		int cat = ucd.getCategory(UTF16.charAt(string,0));
 		String pad = "\u00A0", pad1 = pad;
 		if (cat == Me || cat == Mn) {
 			pad = "\u00A0\u00A0";
 			pad1 = "\u00A0\u00A0\u25cc";
 		}	
 		return "<span title='" + ucd.getCodeAndName(string) + "'>"
 		+ pad1
 		+ TransliteratorUtilities.toHTMLControl.transliterate(string)
 		+ pad
 		+ "</span> ";
 	}
 }
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateThaiBreaks-old.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateThaiBreaks-old.java
@ -1,74 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateThaiBreaks-old.java,v $
 * $Date: 2005/03/04 02:50:26 $
 * $Revision: 1.2 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCD;
 import java.io.*;
 import com.ibm.text.utility.*;
 import com.ibm.icu.text.UnicodeSet;
 import java.util.*;
 public class GenerateThaiBreaks {
  public static void main(String [] args) throws IOException {
    BufferedReader br = new BufferedReader(
      new InputStreamReader(
        new FileInputStream("\\icu4j\\src\\data\\thai6.ucs"), "UnicodeLittle"));
    try {
        Main.setUCD();
        UnicodeSet ignorables = new UnicodeSet("[:M:]");
        ignorables.retain(0x0E00, 0x0E7F); // just Thai block
        ignorables.add(0x0E40, 0x0E44); // add logical order exception
        ignorables.add(0, ' '); // add controls
        ignorables.add('.');
        UnicodeSet initials = new UnicodeSet();
        UnicodeSet finals = new UnicodeSet();
        UnicodeSet medials = new UnicodeSet();
        while (true) {
            String line = br.readLine();
            if (line == null) break;
            int end;
            // find final consonant
            for (int i = line.length() - 1; ; --i) {
                char c = line.charAt(i);
                if (!ignorables.contains(c)) {
                    finals.add(c);
                    end = i;
                    break;
                }
            }
            boolean haveFirst = false;
            for (int i = 0; i < end; ++i) {
                char c = line.charAt(i);
                if (ignorables.contains(c)) continue;
                if (!haveFirst) {
                    initials.add(c);
                    haveFirst = true;
                } else {
                    medials.add(c);
                }
            }
        }
        initials.removeAll(medials);
        finals.removeAll(medials);
        Utility.showSetNames("initials: ", initials, false, Main.ucd);
        Utility.showSetNames("finals: ", finals, false, Main.ucd);
        Utility.showSetNames("medials: ", medials, false, Main.ucd);
    } finally {
        br.close();
    }
  }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateThaiBreaks.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateThaiBreaks.java
@ -1,135 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateThaiBreaks.java,v $
 * $Date: 2006/09/24 23:32:44 $
 * $Revision: 1.5 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCD;
 import java.io.*;
 import com.ibm.icu.text.UTF16;
 //import com.ibm.text.utility;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.text.utility.Utility;
 //import java.util.*;
 public class GenerateThaiBreaks {
  public static void main(String [] args) throws IOException {
    BufferedReader br = new BufferedReader(
      new InputStreamReader(
        new FileInputStream("c:\\icu4j\\src\\com\\ibm\\icu\\dev\\data\\thai6.ucs"), "UnicodeLittle"));
    PrintWriter out = null;
    try {
        UnicodeSet ignorables = new UnicodeSet(); 
        /* new UnicodeSet(0xE30, 0xE3A);
        ignorables.add(0x0E40, 0x0E44); // add logical order exception
        ignorables.add(0x0E47, 0x0E4E);
        */
        ignorables.add(0, ' '); // add controls
        ignorables.add('.');
        UnicodeSet initials = new UnicodeSet();
        UnicodeSet finals = new UnicodeSet();
        UnicodeSet medials = new UnicodeSet();
        char[] buffer = new char[100];
        while (true) {
            String line = br.readLine();
            if (line == null) break;
            int end = 0;
            // find 'real' characters
            for (int i = 0; i < line.length(); ++i) {
                char c = line.charAt(i);
                if (ignorables.contains(c)) continue;
                buffer[end++] = c;
            }
            String temp = new String(buffer, 0, end);
            if (temp.length() <= 1) {
                initials.add(temp);
                finals.add(temp);
                continue;
            }
            initials.add(temp.substring(0,1));
            //initials.add(temp.substring(0,2));
            finals.add(temp.substring(temp.length()-1));
            //finals.add(temp.substring(temp.length()-1));
            for (int i = 1; i < temp.length() - 1; ++i) {
                //medials.add(temp.substring(i, i+2));
                medials.add(temp.substring(i, i+1));
            }
            //medials.add(temp.substring(temp.length() - 2, temp.length() - 1));
        }
        System.out.println("initials size: " + initials.size());
        System.out.println("finals size: " + finals.size());
        System.out.println("medials size: " + medials.size());
        //out = Utility.openPrintWriter("ThaiData.txt", Utility.UTF8_WINDOWS);
       // out.write('\uFEFF');
        UnicodeSet marks = new UnicodeSet("[[\u0e00-\u0e7f]&[[:mn:][:me:]]]");
        finals.addAll(marks);
        UnicodeSet all = new UnicodeSet(initials).addAll(medials).addAll(finals);
        UnicodeSet missingThai = new UnicodeSet("[[\u0e00-\u0e7f]-[:Cn:]]").removeAll(all);
        System.out.println("Never occur: " + missingThai.toPattern(true));
        Utility.showSetNames("", missingThai, true, Default.ucd());
        System.out.println();
        UnicodeSet neverInitial = new UnicodeSet(all).removeAll(initials);
        UnicodeSet neverFinal = new UnicodeSet(all).removeAll(finals);
        System.out.println("Never initial: " + neverInitial.toPattern(true));
        Utility.showSetNames("", neverInitial, true, Default.ucd());
        System.out.println();
        System.out.println("Never final: " + neverFinal.toPattern(true));
        Utility.showSetNames("", neverFinal, true, Default.ucd());
        System.out.println();
        initials.removeAll(medials);
        finals.removeAll(medials);
        System.out.println("initials size: " + initials.size());
        System.out.println("finals size: " + finals.size());
        System.out.println("Only Initials" + initials.toPattern(true));
        Utility.showSetNames("", initials, true, Default.ucd());
        System.out.println();
        System.out.println("Only Finals" + finals.toPattern(true));
        Utility.showSetNames("", finals, true, Default.ucd());
    } finally {
        br.close();
        if (out != null) out.close();
    }
  }
  static class MyBreaker implements Utility.Breaker {
        public String get(Object current, Object old) {
          if (old == null || UTF16.charAt(current.toString(), 0) == UTF16.charAt(old.toString(), 0)) {
            return current.toString() + "(" + Default.ucd().getCode(current.toString().substring(1)) + "))";
          } else {
            return "\r\n" + current + "(" + Default.ucd().getCode(current.toString()) + "))";
          }
        }
        public boolean filter(Object current) { return true; }
  }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/IANANames.java
+++ b/tools/unicodetools/com/ibm/text/UCD/IANANames.java
@ -1,177 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/IANANames.java,v $
 * $Date: 2002/10/05 01:28:58 $
 * $Revision: 1.2 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCD;
 import com.ibm.text.utility.*;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.text.UnicodeSetIterator;
 import com.ibm.icu.lang.UCharacter;
 import java.util.*;
 import java.text.NumberFormat;
 import java.io.*;
 public class IANANames implements UCD_Types {
    private Map aliasToBase = new TreeMap();
    private Map aliasToComment = new TreeMap();
    private Map aliasToLine = new TreeMap();
    public static void testSensitivity() throws IOException {
        IANANames iNames = new IANANames();
        Map m = new HashMap();
        Iterator it = iNames.getIterator();
        UnicodeSet removed = new UnicodeSet();
        int maxLength = 0;
        while (it.hasNext()) {
            String alias = (String) it.next();
            if (maxLength < alias.length()) maxLength = alias.length();
            if (alias.length() > 40) System.out.println("Name >40: " + alias);
            if (alias.indexOf(')') >= 0 || alias.indexOf('(') >= 0) System.out.println("Illegal tag: " + alias);
            String skeleton = removeNonAlphanumeric(alias, removed);
            String other = (String) m.get(skeleton);
            if (other != null) {
                String base = iNames.getBase(alias);
                String otherBase = iNames.getBase(other);
                if (!base.equals(otherBase)) {
                    System.out.println("Collision between: " + alias + " (" + base + ") and " 
                        + other + " (" + otherBase + ")");
                } else {
                    System.out.println("Alias Variant: " + alias + " and " + other + " (" + base + ")");
                }
            } else {
                m.put(skeleton, alias);
            }
        }
        System.out.println("Max Length: " + maxLength);
        System.out.println("Characters removed: ");
        UnicodeSetIterator usi = new UnicodeSetIterator(removed);
        while (usi.next()) {
            char c = (char) usi.codepoint; // safe, can't be supplementary
            System.out.println("0x" + usi.codepoint + "\t'" + c + "'\t" + UCharacter.getName(usi.codepoint));
        }
    }
    public IANANames() throws IOException {
        BufferedReader in = Utility.openReadFile(BASE_DIR + "IANA\\character-sets.txt", Utility.LATIN1);
        try {
            boolean atStart = true;
            String lastName = "";
            int counter = 0;
            while (true) {
                String line = in.readLine();
                if (line == null) break;
                counter++;
                if (atStart) {
                    if (line.startsWith("-------------")) atStart = false;
                    continue;
                }
                if (line.trim().length() == 0) continue;
                if (line.startsWith("Name:") || line.startsWith("Alias:")) {
                    lastName = add(line, lastName, counter);
                } else if (line.startsWith("Source:") || line.startsWith("MIBenum:") 
                        || line.startsWith("        ")) {
                    continue;
                } else if (line.equals("REFERENCES")) {
                    break;
                } else {
                    System.out.println("Unknown Line: " + line);
                }
            }
        } finally {
            in.close();
        }
    }
    private String add(String line, String baseName, int counter) {
        // extract the alias, doing a little validity check
        int pos = line.indexOf(": ");
        if (pos < 0) throw new IllegalArgumentException("Bad line: " + counter + " '" + line + "'");
        String alias = line.substring(pos+2).trim();
        // get comment
        String comment = null;
        pos = alias.indexOf(' ');
        if (pos >= 0) {
            comment = alias.substring(pos).trim();
            alias = alias.substring(0, pos);
        }
        // reset the baseName if we are a name
        if (line.startsWith("Name:")) {
            baseName = alias;
        }
        // store
        if (!alias.equals("None")) {
            if (false) {
                if (baseName.equals(alias)) System.out.println();
                System.out.println("Adding " + alias + "\t=> " + baseName + (comment != null ? "\t(" + comment + ")" : ""));
            }
            // check if it is stored already
            String oldbaseName = (String) aliasToBase.get(alias);
            if (oldbaseName != null) {
                System.out.println("Duplicate alias (" + alias + ", " + oldbaseName + ", " + baseName + "): "
                    + counter + " '" + line + "'");
            }
            aliasToBase.put(alias, baseName);
            if (comment != null) aliasToComment.put(alias, comment);
            aliasToLine.put(alias, comment);
        }
        return baseName;
    }
    public Iterator getIterator() {
        return aliasToBase.keySet().iterator();
    }
    /**
     * Returns the name for this alias, or "" if there is none
     */
    public String getBase(String alias) {
        return (String) aliasToBase.get(alias);
    }
    public static String removeNonAlphanumeric(String s, UnicodeSet removed) {
        s = s.toUpperCase(Locale.ENGLISH); // can't have Turkish!
        StringBuffer result = new StringBuffer();
        boolean removedZero = false;
        for (int i = 0; i < s.length(); ++i) {
            char c = s.charAt(i);
            if (c == '0') {
                char cLast = result.length() > 0 ? result.charAt(result.length() - 1) : '0';
                if ('0' <= cLast && cLast <= '9') {
                    result.append(c);
                } else {
                    if (!removed.contains(c)) {
                        System.out.println("Removed '" + c + "' from " + s + " => " + result);
                        removed.add(c);
                    }
                    removedZero = true;
                }
            } else if (('A' <= c && c <= 'Z') || ('0' <= c && c <= '9')) {
                result.append(c);
            } else {
                if (!removed.contains(c)) {
                    System.out.println("Removed '" + c + "' from " + s + " => " + result);
                    removed.add(c);
                }
            }
        }
        //if (removedZero) System.out.println("Removed 0 from " + s + " => " + result);
        return result.toString();
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/IDNTester.java
+++ b/tools/unicodetools/com/ibm/text/UCD/IDNTester.java
@ -1,142 +0,0 @@
 package com.ibm.text.UCD;
 import java.io.IOException;
 import java.io.PrintWriter;
 import com.ibm.icu.dev.test.util.BagFormatter;
 import com.ibm.icu.impl.PrettyPrinter;
 import com.ibm.icu.text.IDNA;
 import com.ibm.icu.text.StringPrepParseException;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.text.utility.Utility;
 public class IDNTester {
 	static StringBuffer inbuffer = new StringBuffer();
 	static StringBuffer intermediate, outbuffer;
 	static final int OK = 0, DELETED = 1, ILLEGAL = 2, REMAPPED = 3, IDNA_TYPE_LIMIT = 4;
 	static UnicodeSet IDNInputOnly = new UnicodeSet();
 	static UnicodeSet IDNOutput = new UnicodeSet();
 	static boolean initialized = false;
 	static UnicodeSet IDInputOnly32 = new UnicodeSet();
 	static UnicodeSet IDOutput32 = new UnicodeSet();
 	static UnicodeSet IDInputOnly50 = new UnicodeSet();
 	static UnicodeSet IDOutput50 = new UnicodeSet();
 	static PrettyPrinter pp = new PrettyPrinter();
 	static PrintWriter pw;
 	public static void main(String[] args) throws IOException {
 		initialize();
 		pw = BagFormatter.openUTF8Writer(Utility.GEN_DIR, "idnCount.html");
 		pw.println("<html><body>");
 		showSet("IDN InputOnly: ", IDNInputOnly);
 		showSet("IDN Output: ", IDNOutput);
 		showSet("ID InputOnly, U3.2: ", IDInputOnly32);
 		showSet("ID Output, U3.2: ", IDOutput32);
 		showSet("IDN Output - ID Output, U3.2: ", new UnicodeSet(IDNOutput).removeAll(IDOutput32));
 		showSet("IDN Output & ID Output, U3.2: ", new UnicodeSet(IDNOutput).retainAll(IDOutput32));
 		showSet("ID Output - IDN Output, U3.2: ", new UnicodeSet(IDOutput32).removeAll(IDNOutput));
 		showSet("ID InputOnly, U5.0: ", IDInputOnly50);
 		showSet("ID Output, U5.0: ", IDOutput50);
 		showSet("ID Output, U5.0 - U3.2: ", new UnicodeSet(IDOutput50).removeAll(IDOutput32));
 		pw.println("</body></html>");
 		pw.close();
 	}
 	public static void showSet(String title, UnicodeSet set) {
 		pw.println("<h2>" + title + set.size() + "</h2>" + "<p>" + pp.toPattern(set) + "</p>");
 		pw.println();
 	}
 	static UnicodeSet getIDNInput() {
 		if (!initialized) initialize();
 		return IDNInputOnly;
 	}
 	static UnicodeSet getIDNOutput() {
 		if (!initialized) initialize();
 		return IDNInputOnly;
 	}
 	private static void initialize() {
 		UnicodeSet oddballs = new UnicodeSet("[\u034F \u180B-\u180D \uFE00-\uFE0F _]");
 		UCD U32 = UCD.make("3.2.0");
 		Normalizer nfkc32 = new Normalizer(Normalizer.NFKC, "3.2.0");
 		UCDProperty xid32 = DerivedProperty.make(UCD.Mod_ID_Continue_NO_Cf,U32);
 		UnicodeSet IDInput32 = xid32.getSet();
 		IDInput32.add('-').removeAll(oddballs);
 		UCD U50 = UCD.make("5.0.0");
 		Normalizer nfkc50 = new Normalizer(Normalizer.NFKC, "5.0.0");
 		UCDProperty xid50 = DerivedProperty.make(UCD.Mod_ID_Continue_NO_Cf,U50);
 		UnicodeSet IDInput50 = xid50.getSet();
 		IDInput50.add('-').removeAll(oddballs);
 		for (int i = 0; i < 0x10FFFF; ++i) {
 			if ((i & 0xFFF) == 0) {
 				System.out.println(i);
 				System.out.flush();
 			}
 			int type = getIDNAType(i);
 			if (type == OK) {
 				IDNOutput.add(i);
 			} else if (type != ILLEGAL) {
 				IDNInputOnly.add(i);
 			}
 			if (IDInput32.contains(i)) {
 				splitSet(IDInputOnly32, IDOutput32, U32, nfkc32, i);
 			}
 			if (IDInput50.contains(i)) {
 				splitSet(IDInputOnly50, IDOutput50, U50, nfkc50, i);
 			}
 		}
 		initialized = true;
 	}
 	private static void splitSet(UnicodeSet inputOnlySet, UnicodeSet outputSet, UCD ucd, Normalizer nfkc, int i) {
 		if (i < 0x7F) {
 			outputSet.add(i);
 			return;
 		}
 		String v = UTF16.valueOf(i);
 		String s = ucd.getCase(i, UCD.FULL, UCD.FOLD);
 		if (s.equals(v)) {
 			s = nfkc.normalize(s);
 			if (s.equals(v)) {
 				s = ucd.getCase(s, UCD.FULL, UCD.FOLD);
 				if (s.equals(v)) {
 					outputSet.add(i);
 					return;
 				}
 			}
 		}
 		inputOnlySet.add(i);
 	}
 	static public int getIDNAType(int cp) {
 		if (cp == '-') return OK;
 		inbuffer.setLength(0);
 		UTF16.append(inbuffer, cp);
 		try {
 			intermediate = IDNA.convertToASCII(inbuffer,
 					IDNA.DEFAULT); // USE_STD3_RULES
 			if (intermediate.length() == 0)
 				return DELETED;
 			outbuffer = IDNA.convertToUnicode(intermediate,
 					IDNA.USE_STD3_RULES);
 		} catch (StringPrepParseException e) {
 			return ILLEGAL;
 		} catch (Exception e) {
 			System.out.println("Failure at: " + Utility.hex(cp));
 			return ILLEGAL;
 		}
 		if (!TestData.equals(inbuffer, outbuffer))
 			return REMAPPED;
 		return OK;
 	}
 }
--- a/tools/unicodetools/com/ibm/text/UCD/IntMap.java
+++ b/tools/unicodetools/com/ibm/text/UCD/IntMap.java
@ -1,37 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/IntMap.java,v $
 * $Date: 2003/03/18 00:10:47 $
 * $Revision: 1.1 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCD;
 import java.util.HashMap;
 public class IntMap {
    int lowest = Integer.MAX_VALUE;
    int highest = Integer.MIN_VALUE;
    HashMap store = new HashMap();
    public Object get(int key) {
        if (key < lowest || key > highest) return null;
        return store.get(new Integer(key));
    }
    public void put(int key, Object value) {
        if (key < lowest) lowest = key;
        if (key > highest) highest = key;
        store.put(new Integer(key), value);
    }
    public int size() {
        return store.size();
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/InvariantTest.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/InvariantTest.txt
@ -1,92 +0,0 @@
 Show [[:block=tamil:] & [:age=3.2:] - [:age=3.1:]]
 Show [[:block=tamil:] & [:age=4.0:] - [:age=3.2:]]
 Show [[:block=tamil:] & [:age=4.1:] - [:age=4.0:]]
 Show [[:block=tamil:] & [:age=5.0:] - [:age=4.1:]]
 Stop
 Show [[:NFKCQuickCheck=No:] & [$gc:Lm]]
 Stop
 [$Name:  $gc:Sk]
 [$Name:  $gc:Lm]
 Show [[$whitespace] - [$gc:zs]]
 Show [[$gc:zs] - [$whitespace]]
 Let $letter = [$gc:Lu $gc:Ll $gc:Lt $gc:Lo $gc:Lm];
 Let $number = [$gc:Nd $gc:Nl $gc:No]
 Let $mark = [$gc:mn $gc:me $gc:mc]
 Let $LMN = [$letter $number $mark]
 Let $gcAllPunctuation = [$gc:Open_Punctuation $gc:Close_Punctuation $gc:Dash_Punctuation $gc:Connector_Punctuation $gc:Other_Punctuation $gc:Initial_Punctuation $gc:Final_Punctuation]
 Let $gcAllSymbols = [$gc:Currency_Symbol $gc:Modifier_Symbol $gc:Math_Symbol $gc:Other_Symbol]
 Let $nfc = [^$NFC_Quick_Check:No]
 Show $nfc
 Show [$alphabetic - [$mark $letter $number]]
 Let $oldCJK = [\u1100-\u11FF \u3040-\u30FF \u3130-\u318F \u31F0-\u31FF \u3400-\u4DBF \u4E00-\u9FFF \uAC00-\uD7AF \uF900-\uFAFF \uFF65-\uFFDC]
 Show [$oldCJK & $gc:cn]
 Let $fixedOld = [$oldCJK-$gc:cn]
 #List the non-alphabetic old items
 #Show [$oldCJK-$gc:cn-$alphabetic]
 #Check for differences
 #Test $fixedOld = $trialNew
 #ShowEach $mark
 Let $uax29_outliers = [\u3031-\u3035 \u309B-\u309C \u30A0 \u30FC \uFF70 \uFF9E-\uFF9F]
 Let $other_outliers = [\u3099-\u309A \u3006 \u303C \u302A-\u302E \u302F \U000E0100-\U000E01EF]
 # ==========================================
 # Outliers from UAX29
 Show $uax29_outliers
 # Additional outliers
 Show $other_outliers
 # Take the 5 CJK scripts
 Let $trialScripts = [$script:hani $script:hang $script:kana $script:hira $script:bopo]
 # Remove the non-LMN
 Let $trialNewBase = [$trialScripts & $LMN]
 # Add the outliers
 Let $trialNew = [$trialNewBase $uax29_outliers $other_outliers]
 # Show our result
 Show $trialNew
 # As a double-check, show script characters we're tossing
 Show [$trialScripts - $trialNew]
 # Compare snippets stuff
 Let $guessClose = [$lb:QU $lb:Close_Punctuation]
 Let $__closing_punc = ["')>\]`\}\u00AB\u00BB\u2018\u2019\u201C\u201D\u2039\u203A\u207E\u208E\u27E7\u27E9\u27EB\u2984\u2986\u2988\u298A\u298C\u298E\u2990\u2992\u2994\u2996\u2998\u29D9\u29DB\u29FD\u3009\u300B\u300D\u300F\u3011\u3015\u3017\u3019\u301B\u301E\u301F\uFD3F\uFE42\uFE44\uFE5A\uFE5C\uFF02\uFF07\uFF09\uFF3D\uFF5D\uFF63]
 $guessClose = $__closing_punc
 Let $guessClose = [$gc:pf $gc:pe $gc:pi]
 $guessClose = $__closing_punc
 Let $guessTerm = [$sb:aterm $sb:sterm]
 $guessTerm = [? ? !?? ? ? ? ? ??? ? ? ? ? ? ? ? .?? <20> ? ? ? ? ? ? ? ?? ? ? ? ? ? ? ?]
 Let $__issymotherr = [\u00A6\u00A7\u06FD\u06FE\u0F01-\u0F03\u0F13-\u0F17\u0F1A-\u0F1F\u0FBE-\u0FC5\u0FC7-\u0FCC\u2100\u2101\u2104-\u2106\u2108\u2109\u2117\u2118\u211E-\u2121\u2195-\u2199\u219C-\u219F\u21A1\u21A2\u21A4\u21A5\u21A7-\u21AD\u21AF-\u21CD\u21D0\u21D1\u21D5-\u21F3\u2300-\u2307\u230C-\u231F\u2322-\u2328\u232B-\u237B\u237D-\u239A\u2400-\u2426\u2440-\u244A\u249C-\u24E9\u2500-\u25B6\u25B8-\u25C0\u25C2-\u25F7\u2600-\u2613\u2619-\u266E\u2670\u2671\u2701-\u2704\u2706-\u2709\u270C-\u2727\u2729-\u274B\u274F-\u2752\u2758-\u275E\u2761-\u2794\u2798-\u27AF\u27B1-\u27BE\u2800-\u28FF\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u2FF0-\u2FFB\u3012\u3013\u3036\u3037\u303E\u303F\u3190\u3191\u3196-\u319F\u3200-\u321C\u322A-\u3243\u3260-\u327B\u328A-\u32B0\u32C0-\u32CB\u32D0-\u32FE\u3300-\u3376\u337B-\u33DD\u33E0-\u33FE\uA490-\uA4A1\uA4A4-\uA4B3\uA4B5-\uA4C0\uA4C2-\uA4C4\uFFED\uFFEE\uFFFC\uFFFD]
 Let $__issymothers = [\u00B6\u0482\u06E9\u09FA\u0B70\u0F34\u0F36\u0F38\u0FCF\u2114\u2123\u2125\u2127\u2129\u212E\u2132\u213A\u21D3\u220E\u2617\u274D\u2756\u3004\u3020\u327F\uA4C6\uFFE4\uFFE8]
 Let $symOther = [$__issymotherr $__issymothers]
 $symOther = $gcAllSymbols
 [$symOther & $nfc] = [$gcAllSymbols & $nfc]
--- a/tools/unicodetools/com/ibm/text/UCD/ListNFComplete.java
+++ b/tools/unicodetools/com/ibm/text/UCD/ListNFComplete.java
@ -1,18 +0,0 @@
 package com.ibm.text.UCD;
 public class ListNFComplete {
    // find all the characters that are 
    // a) not decomposed by this normalization form
    // b) of combining class 0
    // AND if NKC or NFKC, 
    // c) can never compose with a previous character
    // d) can never compose with a following character
    // e) can never change if another character is added
    //    Example: a-breve might satisfy a-d, but if you
    //    add an ogonek it changes to a-ogonek + breve
    public static void main (String[] args) {
        //Normalizer nfd = new Normalizer(Normalizer.NFD);
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/MLStreamWriter.java
+++ b/tools/unicodetools/com/ibm/text/UCD/MLStreamWriter.java
@ -1,327 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/MLStreamWriter.java,v $
 * $Date: 2003/04/25 01:39:15 $
 * $Revision: 1.4 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCD;
 import java.io.*;
 import java.util.*;
 import com.ibm.text.UCD.*;
 public class MLStreamWriter extends Writer {
    public static final String copyright =
      "Copyright (C) 2000, IBM Corp. and others. All Rights Reserved.";
    public MLStreamWriter (PrintWriter output, boolean HTML) {
        out = output;
        isHTML = HTML;
    }
    public MLStreamWriter (PrintWriter output) {
        this(output,true);
    }
    public MLStreamWriter el(String elementName) {
        closeIfOpen();
        print('<', AFTER);
        print(elementName, elementName.equals("!--") ? AFTER+FORCE : AFTER);
        stack.add(elementName);
        inElement = true;
        return this;
    }
    private MLStreamWriter closeIfOpen() {
        if (inElement && !"!--".equals(stack.get(stack.size()-1))) {
            print('>',BEFORE+FORCE);
        }
        inElement = false;
        return this;
    }
    final public MLStreamWriter cel(String elementName) {
        return cl().tx(elementName);
    }
    public MLStreamWriter at(String attributeName, String attributeValue) {
        if (!inElement) {
            throw new IllegalArgumentException("attribute \"" + attributeName + "\" not in element");
        }
        print(' ', BOTH);
        print(attributeName, AFTER);
        print('=', AFTER);
        print('"');
        print(quoted(attributeValue));
        print('"', AFTER);
        return this;
    }
    public MLStreamWriter at(String attributeName, int value) {
        return at(attributeName, String.valueOf(value));
    }
    public MLStreamWriter CR() {
        closeIfOpen();
        out.println();
        return this;
    }
    /*public MLStreamWriter comment() {
        closeIfOpen();
        print("<!--");
        CR();
        return this;
    }
    public MLStreamWriter endComment() {
        print("-->");
        return this;
    }
    */
    public MLStreamWriter tx(String text) {
        closeIfOpen();
        print(quoted(text));
        return this;
    }
    final public MLStreamWriter tx(char text) {
        return tx(String.valueOf(text));
    }
    final public MLStreamWriter tx(int text) {
        return tx(String.valueOf(text));
    }
    final public MLStreamWriter tx16(String text) {
        return tx(hex(text));
    }
    final public MLStreamWriter tx16(char text) {
        return tx(hex(text));
    }
    final public MLStreamWriter tx16(int text) {
        return tx(hex(text));
    }
    public MLStreamWriter cl(String closingElement) {
        closeIfOpen();
        String lastElement = (String)stack.remove(stack.size()-1);
        if (closingElement != null && !closingElement.equals(lastElement)) {
            throw new IllegalArgumentException("mismatch when closing \"" + closingElement
                + "\", current active element is \"" + lastElement + "\"");
        }
        if (lastElement.equals("!--")) {// hack for XML/HTML
            print("-->",BEFORE+FORCE);
        } else {
            print("</");
            print(lastElement);
            print('>',BEFORE);
        }
        return this;
    }
    final public MLStreamWriter cl() {
        return cl(null);
    }
    public MLStreamWriter closeAllElements() {
        for (int i = stack.size()-1; i >= 0; --i) {
            cl(null);
        }
        return this;
    }
    // stream stuff
    public void write(char[] source, int start, int len) {
        closeIfOpen();
        // later make more efficient!!
        out.print(quoted(new String(source, start, len)));
    }
    public void close() {
        closeAllElements();
        out.close();
    }
    public void flush() {
        out.flush();
    }
    // Utility methods
    final public MLStreamWriter cell(String ch, String type, String codepoint, String cat) {
        if (codepoint == null) codepoint = ch;
        int dotpos = type.indexOf('.');
        if (dotpos == -1) el(type);
        else {
            el(type.substring(0,dotpos));
            at("class",type.substring(dotpos+1));
        }
        /*
        if (color == -1) {
            el("th");
        } else {
            el("td");
            if (color != 0xFFFFFF) {
                at("bgcolor","#"+hex(color,6));
            }
        }
        */
        tx(ch).el("br").el("tt").tx16(codepoint);
        if (cat != null) tx(" ").tx(cat);
        cl().cl().cl();
        return this;
    }
    final public MLStreamWriter cell(String ch) {
        return cell(ch,"td",null,null);
    }
    final public MLStreamWriter cell(String ch, String type) {
        return cell(ch,type,null,null);
    }
    final public MLStreamWriter cell(String ch, String type, String codepoint) {
        return cell(ch,type,codepoint,null);
    }
    static public String hex(int i, int width) {
        String result = Long.toString(i & 0xFFFFFFFFL, 16).toUpperCase();
        return "00000000".substring(result.length(),width) + result;
    }
    /**
     * Supplies a zero-padded hex representation of an integer (without 0x)
     */
    static public String hex(int i) {
        return hex(i,8);
    }
    /**
     * Supplies a zero-padded hex representation of a Unicode character (without 0x, \\u)
     */
    static public String hex(char i) {
        return hex(i,4);
    }
    /**
     * Supplies a zero-padded hex representation of a Unicode String (without 0x, \\u)
     *@param sep can be used to give a sequence, e.g. hex("ab", ",") gives "0061,0062"
     */
    static public String hex(String s, String sep) {
        StringBuffer result = new StringBuffer();
        for (int i = 0; i < s.length(); ++i) {
            if (i != 0) result.append(sep);
            result.append(hex(s.charAt(i)));
        }
        return result.toString();
    }
    static public String hex(String s) {
        return hex(s," ");
    }
    public void author(String name, String url) {
        el("font").at("size","-3").tx("[").el("a").at("href",url).tx(name).cl("a").el("script").el("!--");
        tx("document.write(', ', document.lastModified);");
        cl("!--").cl("script").tx("]").cl("font");
    }
    // ================== PRIVATES =================
    PrintWriter out;
    boolean isHTML;
    ArrayList stack = new ArrayList();
    boolean inElement = false;
    Normalizer formC = new Normalizer(Normalizer.NFC, "");
    int len;
    int maxLineLength = 60;
    // later, add better line end management, indenting
    static final int NONE=0, BEFORE=1, AFTER=2, BOTH=3, FORCE = 4; // chosen for bits!!
    final void print(String s) {
        print(s,NONE);
    }
    final void print(char c) {
        print(c,NONE);
    }
    final void print(String s, int doesBreak) {
        if ((doesBreak & BEFORE) != 0) tryBreak(s.length(), doesBreak);
        len += s.length();
        out.print(s);
        if ((doesBreak & AFTER) != 0) tryBreak(0, doesBreak);
    }
    final void print(char c, int doesBreak) {
        if ((doesBreak & BEFORE) != 0) tryBreak(1, doesBreak);
        ++len;
        out.print(c);
        if ((doesBreak & AFTER) != 0) tryBreak(0, doesBreak);
    }
    void tryBreak(int toAdd, int doesBreak) {
        if ((doesBreak & FORCE) != 0 || (len + toAdd) > maxLineLength) {
            out.println();
            len = stack.size();
            for (int i = 0; i < len; ++i) out.print(' ');
        }
    }
    public String quoted(String source) {
        source = formC.normalize(source);
        StringBuffer result = new StringBuffer();
        for (int i = 0; i < source.length(); ++i) {
            char ch = source.charAt(i);
            switch(ch) {
            case '\'':
                if (!isHTML) {
                    result.append("&apos;");
                } else {
                    result.append(ch);
                }
                break;
            case '\"':
                result.append("&quot;");
                break;
            case '<':
                result.append("&lt;");
                break;
            case '&':
                result.append("&amp;");
                break;
            case '>':
                result.append("&gt;");
                break;
            case '\n': case '\r': case '\t':
                result.append(ch);
                break;
            default: if (ch < ' ' // do surrogates later
                || ch >= '\u007F' && ch <= '\u009F'
                || ch >= '\uD800' && ch <= '\uDFFF'
                || ch >= '\uFFFE') {
                    result.append('\uFFFD');
                } else {
                    result.append(ch);
                }
                break;
            }
        }
        return result.toString();
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/Main.java
+++ b/tools/unicodetools/com/ibm/text/UCD/Main.java
@ -1,350 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
 * $Date: 2006/04/05 22:12:44 $
 * $Revision: 1.37 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCD;
 import java.util.Date;
 import com.ibm.text.utility.*;
 public final class Main implements UCD_Types {
    static final String classPrefix = "com.ibm.text.UCD.";
    static final String[] CORE_FILES = {
        "CaseFolding",
        "CompositionExclusions",
        "DerivedCoreProperties",
        "DerivedNormalizationProps",
        "NormalizationTest",
        "PropertyAliases",
        "PropList",
        "Scripts",
        "SpecialCasing",
        "HangulSyllableType",
        "DerivedAge",
        "StandardizedVariants",
        "HangulSyllableType",
        //"OtherDerivedProperties",
    };
    static final String[] EXTRACTED_FILES = {
        "DerivedBidiClass",
        "DerivedBinaryProperties",
        "DerivedCombiningClass",
        "DerivedDecompositionType",
        "DerivedEastAsianWidth",
        "DerivedGeneralCategory",
        "DerivedJoiningGroup",
        "DerivedJoiningType",
        "DerivedLineBreak",
        "DerivedNumericType",
        "DerivedNumericValues",
    };
    static final String[] ALL_FILES = {
        "Core", "Extracted"
    };
    public static void main (String[] args) throws Exception {
        System.out.println("*** Start *** " + Default.getDate());
        try {
            for (int i = 0; i < args.length; ++i) {
                long mask = 0;
                String arg = args[i];
                if (arg.charAt(0) == '#') return; // skip rest of line
                Utility.fixDot();
                System.out.println();
                System.out.println("** Argument: " + args[i] + " ** " + Default.getDate());
                // Expand string arguments
                if (arg.equalsIgnoreCase("ALL")) {
                    args = Utility.append(ALL_FILES, Utility.subarray(args, i+1));
                    i = -1;
                    continue;
                }
                if (arg.equalsIgnoreCase("CORE")) {
                    args = Utility.append(CORE_FILES, Utility.subarray(args, i+1));
                    i = -1;
                    continue;
                }
                if (arg.equalsIgnoreCase("EXTRACTED")) {
                    args = Utility.append(EXTRACTED_FILES, Utility.subarray(args, i+1));
                    i = -1;
                    continue;
                }
                // make sure the UCD is set up
                if (arg.equalsIgnoreCase("version")) {
                    Default.setUCD(args[++i]);
                    continue;
                }
                // Now handle other options
                if (arg.equalsIgnoreCase("verify")) {
                    VerifyUCD.verify();
                    VerifyUCD.checkCanonicalProperties();
                    VerifyUCD.CheckCaseFold();
                    VerifyUCD.checkAgainstUInfo();
                } else if (arg.equalsIgnoreCase("build")) ConvertUCD.main(new String[]{Default.ucdVersion()});
                else if (arg.equalsIgnoreCase("statistics")) VerifyUCD.statistics();
                else if (arg.equalsIgnoreCase("NFSkippable")) NFSkippable.main(null);
                else if (arg.equalsIgnoreCase("diffIgnorable")) VerifyUCD.diffIgnorable();
                else if (arg.equalsIgnoreCase("generateXML")) VerifyUCD.generateXML();
                else if (arg.equalsIgnoreCase("checkSpeed")) VerifyUCD.checkSpeed();
                else if (arg.equalsIgnoreCase("onetime")) VerifyUCD.oneTime();
                else if (arg.equalsIgnoreCase("verifyNormalizationStability")) VerifyUCD.verifyNormalizationStability();
                else if (arg.equalsIgnoreCase("definitionTransliterator")) GenerateHanTransliterator.main(0);
                else if (arg.equalsIgnoreCase("romajiTransliterator")) GenerateHanTransliterator.main(1);
                else if (arg.equalsIgnoreCase("pinYinTransliterator")) GenerateHanTransliterator.main(2);
                else if (arg.equalsIgnoreCase("hanproperties")) GenerateHanTransliterator.readUnihan();
                else if (arg.equalsIgnoreCase("fixChineseOverrides")) GenerateHanTransliterator.fixChineseOverrides();
                else if (arg.equalsIgnoreCase("compareBlueberry")) VerifyUCD.compareBlueberry();
                else if (arg.equalsIgnoreCase("testenum")) SampleEnum.test();
                else if (arg.equalsIgnoreCase("quicktest")) QuickTest.test();
                else if (arg.equalsIgnoreCase("TernaryStore")) TernaryStore.test();
                else if (arg.equalsIgnoreCase("checkBIDI")) VerifyUCD.checkBIDI();
                else if (arg.equalsIgnoreCase("Buildnames")) BuildNames.main(null);
                else if (arg.equalsIgnoreCase("TestNormalization")) TestNormalization.main(null);
                else if (arg.equalsIgnoreCase("binary")) FastBinarySearch.test();
                else if (arg.equalsIgnoreCase("GenerateCaseTest")) GenerateCaseTest.main(null);
                else if (arg.equalsIgnoreCase("checkDecompFolding")) VerifyUCD.checkDecompFolding();
                else if (arg.equalsIgnoreCase("breaktest")) GenerateBreakTest.main(null);
                else if (arg.equalsIgnoreCase("checkcollator")) CheckCollator.main(null);
                //else if (arg.equalsIgnoreCase("genSplit")) GenerateData.genSplit();
                else if (arg.equalsIgnoreCase("iana")) IANANames.testSensitivity();
                else if (arg.equalsIgnoreCase("testDerivedProperties")) DerivedProperty.test();
                else if (arg.equalsIgnoreCase("checkCase")) VerifyUCD.checkCase();
                else if (arg.equalsIgnoreCase("checkCase3")) VerifyUCD.checkCase3();
                else if (arg.equalsIgnoreCase("checkCaseLong")) VerifyUCD.checkCase2(true);
                else if (arg.equalsIgnoreCase("checkCaseShort")) VerifyUCD.checkCase2(false);
                else if (arg.equalsIgnoreCase("checkCanonicalProperties")) VerifyUCD.checkCanonicalProperties();
                else if (arg.equalsIgnoreCase("CheckCaseFold")) VerifyUCD.CheckCaseFold();
                else if (arg.equalsIgnoreCase("genIDN")) VerifyUCD.genIDN();
                else if (arg.equalsIgnoreCase("VerifyIDN")) VerifyUCD.VerifyIDN();
                else if (arg.equalsIgnoreCase("NFTest")) VerifyUCD.NFTest();
                else if (arg.equalsIgnoreCase("test1")) VerifyUCD.test1();
                //else if (arg.equalsIgnoreCase("TrailingZeros")) GenerateData.genTrailingZeros();
                else if (arg.equalsIgnoreCase("GenerateThaiBreaks")) GenerateThaiBreaks.main(null);
                else if (arg.equalsIgnoreCase("TestData")) TestData.main(new String[]{args[++i]});
                else if (arg.equalsIgnoreCase("MakeUnicodeFiles")) MakeUnicodeFiles.main(new String[]{});
                //else if (arg.equalsIgnoreCase("checkAgainstUInfo")) checkAgainstUInfo();
                else if (arg.equalsIgnoreCase("checkScripts")) VerifyUCD.checkScripts();
                else if (arg.equalsIgnoreCase("IdentifierTest")) VerifyUCD.IdentifierTest();
                else if (arg.equalsIgnoreCase("BuildNames")) BuildNames.main(null);
                else if (arg.equalsIgnoreCase("JavascriptProperties")) WriteJavaScriptInfo.assigned();
                else if (arg.equalsIgnoreCase("TestDirectoryIterator")) DirectoryIterator.test();
                //else if (arg.equalsIgnoreCase("checkIdentical")) GenerateData.handleIdentical();
                else if (arg.equalsIgnoreCase("testnameuniqueness")) TestNameUniqueness.checkNameList();
                //else if (arg.equalsIgnoreCase("checkDifferences")) GenerateData.checkDifferences("3.2.0");
                else if (arg.equalsIgnoreCase("Compare14652")) Compare14652.main(null);
                //else if (arg.equalsIgnoreCase("NormalizationCharts")) ChartGenerator.writeNormalizationCharts();
                /*else if (arg.equalsIgnoreCase("writeNormalizerTestSuite"))
                    GenerateData.writeNormalizerTestSuite("NormalizationTest-3.1.1d1.txt");
                    */
                // EXTRACTED PROPERTIES
                /*
                else if (arg.equalsIgnoreCase("DerivedBidiClass")) {
                    GenerateData.generateVerticalSlice(BIDI_CLASS, BIDI_CLASS+NEXT_ENUM, GenerateData.HEADER_DERIVED,
                        "DerivedData/extracted/", "DerivedBidiClass");
                } else if (arg.equalsIgnoreCase("DerivedBinaryProperties")) {
                    GenerateData.generateVerticalSlice(BINARY_PROPERTIES, BINARY_PROPERTIES+1, GenerateData.HEADER_DERIVED,
                        "DerivedData/extracted/", "DerivedBinaryProperties" );
                } else if (arg.equalsIgnoreCase("DerivedCombiningClass")) {
                    GenerateData.generateVerticalSlice(COMBINING_CLASS, COMBINING_CLASS+NEXT_ENUM, GenerateData.HEADER_DERIVED,
                        "DerivedData/extracted/", "DerivedCombiningClass" );
                } else if (arg.equalsIgnoreCase("DerivedDecompositionType")) {
                    GenerateData.generateVerticalSlice(DECOMPOSITION_TYPE, DECOMPOSITION_TYPE+NEXT_ENUM, GenerateData.HEADER_DERIVED,
                        "DerivedData/extracted/", "DerivedDecompositionType" );
                } else if (arg.equalsIgnoreCase("DerivedEastAsianWidth")) {
                    GenerateData.generateVerticalSlice(EAST_ASIAN_WIDTH, EAST_ASIAN_WIDTH+NEXT_ENUM, GenerateData.HEADER_DERIVED,
                        "DerivedData/extracted/", "DerivedEastAsianWidth" );
                } else if (arg.equalsIgnoreCase("DerivedGeneralCategory")) {
                    GenerateData.generateVerticalSlice(CATEGORY, CATEGORY+NEXT_ENUM, GenerateData.HEADER_DERIVED,
                        "DerivedData/extracted/", "DerivedGeneralCategory" );
                } else if (arg.equalsIgnoreCase("DerivedJoiningGroup")) {
                    GenerateData.generateVerticalSlice(JOINING_GROUP, JOINING_GROUP+NEXT_ENUM, GenerateData.HEADER_DERIVED,
                        "DerivedData/extracted/", "DerivedJoiningGroup" );
                } else if (arg.equalsIgnoreCase("DerivedJoiningType")) {
                    GenerateData.generateVerticalSlice(JOINING_TYPE, JOINING_TYPE+NEXT_ENUM, GenerateData.HEADER_DERIVED,
                        "DerivedData/extracted/", "DerivedJoiningType" );
                } else if (arg.equalsIgnoreCase("DerivedLineBreak")) {
                    GenerateData.generateVerticalSlice(LINE_BREAK, LINE_BREAK+NEXT_ENUM, GenerateData.HEADER_DERIVED,
                        "DerivedData/extracted/", "DerivedLineBreak" );
                } else if (arg.equalsIgnoreCase("DerivedNumericType")) {
                    GenerateData.generateVerticalSlice(NUMERIC_TYPE, NUMERIC_TYPE+NEXT_ENUM, GenerateData.HEADER_DERIVED,
                        "DerivedData/extracted/", "DerivedNumericType" );
                } else if (arg.equalsIgnoreCase("HangulSyllableType")) {
                    GenerateData.generateVerticalSlice(HANGUL_SYLLABLE_TYPE,HANGUL_SYLLABLE_TYPE+NEXT_ENUM, GenerateData.HEADER_EXTEND,
                        "DerivedData/", "HangulSyllableType" );
                } else if (arg.equalsIgnoreCase("DerivedNumericValues")) {
                    GenerateData.generateVerticalSlice(LIMIT_ENUM, LIMIT_ENUM, GenerateData.HEADER_DERIVED,
                        "DerivedData/extracted/", "DerivedNumericValues" );
                } 
                */
                else if (arg.equalsIgnoreCase("StandardizedVariants")) {
                    GenerateStandardizedVariants.generate();
        // OTHER STANDARD PROPERTIES
                } else if (arg.equalsIgnoreCase("CaseFolding")) {
                    GenerateCaseFolding.makeCaseFold(true);
                    GenerateCaseFolding.makeCaseFold(false);
                } else if (arg.equalsIgnoreCase("SpecialCasing")) {
                    GenerateCaseFolding.generateSpecialCasing(true);
                    GenerateCaseFolding.generateSpecialCasing(false);
 /*               } else if (arg.equalsIgnoreCase("CompositionExclusions")) {
                    GenerateData.generateCompExclusions();
                } else if (arg.equalsIgnoreCase("DerivedAge")) {
                    GenerateData.generateAge("DerivedData/", "DerivedAge");
                } else if (arg.equalsIgnoreCase("backwardsCompat")) {
                    GenerateData.backwardsCompat("DerivedData/extracted/", "Compatibility_ID_START",
            			new int[] {ID_Start, ID_Continue_NO_Cf, Mod_ID_Start, Mod_ID_Continue_NO_Cf});
                } else if (arg.equalsIgnoreCase("DerivedCoreProperties")) {
                    GenerateData.generateDerived(DERIVED_CORE, true, GenerateData.HEADER_DERIVED, "DerivedData/", "DerivedCoreProperties");
                } else if (arg.equalsIgnoreCase("DerivedNormalizationProps")) {
                    GenerateData.generateDerived(DERIVED_NORMALIZATION, true, GenerateData.HEADER_DERIVED, "DerivedData/", 
                        "DerivedNormalizationProps" );
                } else if (arg.equalsIgnoreCase("NormalizationTest")) {
                    GenerateData.writeNormalizerTestSuite("DerivedData/", "NormalizationTest");
                } else if (arg.equalsIgnoreCase("PropertyAliases")) {
                    GenerateData.generatePropertyAliases();                
                } else if (arg.equalsIgnoreCase("PropList")) {
                    GenerateData.generateVerticalSlice(BINARY_PROPERTIES + White_space, BINARY_PROPERTIES + NEXT_ENUM,
                            GenerateData.HEADER_EXTEND, "DerivedData/", "PropList");
                } else if (arg.equalsIgnoreCase("Scripts")) {
                    GenerateData.generateVerticalSlice(SCRIPT+1, SCRIPT + NEXT_ENUM, 
                            GenerateData.HEADER_SCRIPTS, "DerivedData/", "Scripts");
        // OTHER TESTING
                } else if (arg.equalsIgnoreCase("OtherDerivedProperties")) {
                    //mask = Utility.setBits(0, NFC_Leading, NFC_Resulting);
                    GenerateData.generateDerived((byte)(ALL & ~DERIVED_CORE & ~DERIVED_NORMALIZATION), false, GenerateData.HEADER_DERIVED, "OtherData/", "OtherDerivedProperties");
                } else if (arg.equalsIgnoreCase("AllBinary")) {
                    GenerateData.generateVerticalSlice(BINARY_PROPERTIES, BINARY_PROPERTIES + NEXT_ENUM,
                            GenerateData.HEADER_EXTEND, "OtherDerived/", "AllBinary");
                } else if (arg.equalsIgnoreCase("DerivedGeneralCategoryTEST")) {
                    GenerateData.generateVerticalSlice(CATEGORY+29, CATEGORY+32, GenerateData.HEADER_DERIVED,
                        "DerivedData/", "DerivedGeneralCategory" );
                } else if (arg.equalsIgnoreCase("listDifferences")) {
                    CompareProperties.listDifferences();
    			} else if (arg.equalsIgnoreCase("partition")) {
    				CompareProperties.partition();
    			} else if (arg.equalsIgnoreCase("propertyStatistics")) {
    				CompareProperties.statistics();
                } else if (arg.equalsIgnoreCase("listAccents")) {
                    GenerateData.listCombiningAccents();
                } else if (arg.equalsIgnoreCase("listGreekVowels")) {
                    GenerateData.listGreekVowels();
                } else if (arg.equalsIgnoreCase("listKatakana")) {
                    GenerateData.listKatakana();
 */                    
                /* 
                } else if (arg.equalsIgnoreCase("DerivedFullNormalization")) {
                    mask = Utility.setBits(0, DerivedProperty.GenNFD, DerivedProperty.GenNFKC);
                    GenerateData.generateDerived(mask, GenerateData.HEADER_DERIVED, "DerivedData/", "DerivedFullNormalization" );
                } else if (arg.equalsIgnoreCase("caseignorable")) {
                    mask = Utility.setBits(0, DerivedProperty.Other_Case_Ignorable, DerivedProperty.Type_i);
                    GenerateData.generateDerived(mask, GenerateData.HEADER_DERIVED, "OtherData/", "CaseIgnorable" );
                } else if (arg.equalsIgnoreCase("nfunsafestart")) {
                    mask = Utility.setBits(0, NFD_UnsafeStart, NFKC_UnsafeStart);
                    GenerateData.generateDerived(mask, GenerateData.HEADER_DERIVED, "OtherData/", "NFUnsafeStart");
                */
                } else {
                    CallArgs.call(new String[]{arg}, classPrefix);
                }
                //checkHoffman("\u05B8\u05B9\u05B1\u0591\u05C3\u05B0\u05AC\u059F");
                //checkHoffman("\u0592\u05B7\u05BC\u05A5\u05B0\u05C0\u05C4\u05AD");
                    //GenerateData.generateDerived(Utility.setBits(0, DerivedProperty.PropMath, DerivedProperty.Mod_ID_Continue_NO_Cf),
                    //    GenerateData.HEADER_DERIVED, "DerivedData/", "DerivedPropData2" );
                //GenerateData.generateVerticalSlice(SCRIPT, SCRIPT+1, "ScriptCommon" );
                //listStrings("LowerCase" , 0,0);
                //GenerateData.generateVerticalSlice(0, LIMIT_ENUM, SKIP_SPECIAL, PROPLIST1, "DerivedData/", "DerivedPropData1" );
                // AGE stuff
                //UCD ucd = UCD.make();
                //System.out.println(ucd.getAgeID(0x61));
                //System.out.println(ucd.getAgeID(0x2FA1D));
                //
            }
        } finally {
            System.out.println("*** Done *** " + Default.getDate());
        }
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/MakeNamesChart.java
+++ b/tools/unicodetools/com/ibm/text/UCD/MakeNamesChart.java
@ -1,506 +0,0 @@
 package com.ibm.text.UCD;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.PrintWriter;
 import java.util.ArrayList;
 import java.util.BitSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
 import java.util.TreeMap;
 import java.util.TreeSet;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import com.ibm.icu.dev.test.util.BagFormatter;
 import com.ibm.icu.dev.test.util.TransliteratorUtilities;
 import com.ibm.icu.dev.test.util.UnicodeMap;
 import com.ibm.icu.dev.test.util.UnicodePropertySource;
 import com.ibm.icu.text.Collator;
 import com.ibm.icu.text.Replaceable;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.text.UnicodeSetIterator;
 import com.ibm.icu.util.ULocale;
 import com.ibm.text.utility.Utility;
 import com.ibm.text.utility.Utility.Encoding;
 public class MakeNamesChart {
 	static int lastCodePoint = -1;
 	static boolean lastCodePointIsOld = false;
 	static int lastDecompType = UCD.NONE;
 	static final String chartPrefix = "c_";
 	static final String namePrefix = "n_";
 	static UnicodeSet skipChars;// = new UnicodeSet("[[:gc=cn:]-[:noncharactercodepoint:]]");
 	static UnicodeSet rtl;// = new UnicodeSet("[[:bidiclass=r:][:bidiclass=al:]]");
 	static UnicodeSet usePicture;// = new UnicodeSet("[[:whitespace:][:defaultignorablecodepoint:]]");
 	static UCD ucd41;
 	public static void main(String[] args) throws Exception {
 		//ConvertUCD.main(new String[]{"5.0.0"});
 		BlockInfo blockInfo = new BlockInfo("5.0.0", "NamesList.txt");
 		// http://www.unicode.org/~book/incoming/kenfiles/U50M051010.lst
 		Default.setUCD("5.0.0");
 		ucd41 = UCD.make("4.1.0");
 		ToolUnicodePropertySource up = ToolUnicodePropertySource.make("5.0.0");
 		skipChars = new UnicodeSet(up.getSet("gc=cn")).removeAll(up.getSet("gc=cn"));
 		//"[[:gc=cn:]-[:noncharactercodepoint:]]");
 		rtl = new UnicodeSet(up.getSet("bidiclass=r")).addAll(up.getSet("bidiclass=al"));// "[[:bidiclass=r:][:bidiclass=al:]]");
 		usePicture = new UnicodeSet(up.getSet("whitespace=true")).addAll(up.getSet("defaultignorablecodepoint=true"));// new UnicodeSet("[[:whitespace:][:defaultignorablecodepoint:]]");
 		List nameList = new ArrayList();
 		ArrayList lines = new ArrayList();
 		UnicodeSet collectedCodePoints = new UnicodeSet();
 		BitSet nameListNew = new BitSet();
 		int limit = Integer.MAX_VALUE;
 		for (int count = 0; count < limit; ++count) {
 			if (!blockInfo.next(lines)) break;
 			String firstLine = (String)lines.get(0);
 			if (firstLine.startsWith("@@@")) continue;
 			String[] lineParts = firstLine.split("\t");
 			String fileName = lineParts[1] + ".html";
 			nameList.add(firstLine);
 			System.out.println();
 			System.out.println("file: " + chartPrefix + fileName);
 			PrintWriter out = BagFormatter.openUTF8Writer("C:/DATA/GEN/charts/namelist/", chartPrefix + fileName);
 			out.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'><title>" +
 					TransliteratorUtilities.toHTML.transliterate(getHeading(lineParts[2])) +
 					"</title><link rel='stylesheet' type='text/css' href='namelist.css'>" +
 					"<base target='names'></head><body>");
 			// header
 			out.println("<table class='headerTable'><tr><td class='headerLeft'>" +
 					lineParts[1] + 
 					" <a href='help.html'>help</a></td><td class='headerCenter'>" +				
 					getHeading(lineParts[2]) +
 					"</td><td class='headerRight'><a href='mainList.html'>index</a> " +
 					lineParts[3] +
 					"</td></tr></table>");
 			if ("Unassigned".equals(lineParts[2])) {
 				System.out.println("debug");
 			}
 			// first pass through and collect all the code points
 			collectedCodePoints.clear();
 			for (int i = 1; i < lines.size(); ++i) {
 				String line = (String)lines.get(i);
 				int cp1 = line.charAt(0);
 				if (cp1 != '@' && cp1 != '\t') {
 					int cp = Integer.parseInt(line.split("\t")[0],16);
 					collectedCodePoints.add(cp);
 				}
 			}
 			collectedCodePoints.removeAll(skipChars);
 			if (collectedCodePoints.size() == 0) {
 				out.println("<p align='center'>No Names List</p>");
 			} else {
 				out.println("<div align='center'><table class='chart'><tr>");
 				int counter = 0;
 				for (UnicodeSetIterator it = new UnicodeSetIterator(collectedCodePoints); it.next();) {
 					if ((counter % 16) == 0 && counter != 0) {
 						out.println("</tr><tr>");
 					}
 					String tdclass = "cell";
 					if (counter < 16) tdclass = "cellw";
 					if (it.codepoint == 0x242) {
 						System.out.println("debug");
 					}
 					boolean isNew = isNew(it.codepoint);
 					if (isNew) tdclass += "new";
 					String hexcp = Utility.hex(it.codepoint, 4);
 					String title = "";
 					String name = Default.ucd().getName(it.codepoint);
 					if (name != null) title = " title='" + TransliteratorUtilities.toHTML.transliterate(name.toLowerCase()) + "'";
 					out.println("<td class='" + tdclass + "'"
 							+ title
 							+ ">\u00A0"
 							+ showChar(it.codepoint) + "\u00A0<br><tt><a href='" + namePrefix + fileName + "#"+ hexcp + "'>" + 
 							hexcp + "</a></tt></td>");
 					counter++;
 				}
 				if (counter > 16) {
 					counter &= 0xF;
 					if (counter != 0) for (; counter < 16; ++counter) out.println("<td class='cell'>\u00A0</td>");
 					out.println("</tr></table></div>");
 				}
 			}
 			out.close();
 			out = BagFormatter.openUTF8Writer("C:/DATA/GEN/charts/namelist/", namePrefix + fileName);
 			out.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>" +
 					"<link rel='stylesheet' type='text/css' href='namelist.css'></head><body>");
 			// now do the characters
 			boolean inTable = false;
 			for (int i = 1; i < lines.size(); ++i) {
 				String line = (String)lines.get(i);
 				try {
 					if (line.startsWith("@")) {
 						finishItem(out);
 						if (inTable) {
 							out.println("</table>");
 							inTable = false;
 						}
 						if (line.startsWith("@+")) {
 							line = line.substring(2).trim();
 							out.println("<p class='comment'>"
 									+ line
 									+ "</p>");
 						} else {
 							line = line.substring(1).trim();
 							out.println("<h2>"
 									+ line
 									+ "</h2>");
 						}
 					} else {
 						if (!inTable) {
 							out.println("<table>");
 							inTable = true;
 						}
 						//String line2 = lineParts[1];
 						if (line.startsWith("\t")) {
 							String body = line.trim();
 							if (false && line.indexOf(body) != 1) {
 								System.out.println("Format error: too much inital whitespace: <" + line + ">");
 							}
 							char firstChar = body.charAt(0);
 							switch (firstChar) {
 							case '*': body = "\u2022 " + body.substring(2); break;
 							case ':': body = checkCanonical(lastCodePoint, body); break;
 							case '#': body = checkCompatibility(lastCodePoint, body); break;
 							case 'x': body = getOther(body); break;
 							case '=': break;
 							default: throw new IllegalArgumentException("Huh? " + body);
 							}  
 							out.println("<tr><td>\u00A0</td><td>\u00A0</td><td>"
 									+ maybeNameStyle(showTextConvertingHex(body, firstChar != '='), firstChar == '=')
 									+ "</td></tr>");
 						} else {
 							finishItem(out);
 							lineParts = line.split("\t");
 							String x = lineParts[0];
 							lastCodePoint = Integer.parseInt(x,16);
 							boolean lastCodePointIsNew = isNew(lastCodePoint);
 							if (lastCodePointIsNew) nameListNew.set(nameList.size()-1, true);
 							out.println("<tr><td" 
 									+ (lastCodePointIsNew ? " class='new'" : "")
 									+ "><code><a name='" + x + "'>" + x + "</a></code></td><td>\u00A0"
 									+ showChar(lastCodePoint) + "\u00A0</td><td"
 									+ (lastCodePointIsNew ? " class='new'" : "") + ">"
 									+ nameStyle(showTextConvertingHex(lineParts[1], false)) + "</td></tr>");
 							lastDecompType = Default.ucd().getDecompositionType(lastCodePoint);
 						}
 					}
 				} catch (Exception e) {
 					throw (IllegalArgumentException) new IllegalArgumentException("Error on line: " + line)
 					.initCause(e);
 				}
 			}
 			finishItem(out);
 			out.close();
 		}
 		blockInfo.in.close();
 		PrintWriter out = BagFormatter.openUTF8Writer("C:/DATA/GEN/charts/namelist/", "mainList.html");
 		out.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>" +
 				"<title>Main List</title><link rel='stylesheet' type='text/css' href='namelist.css'>" +
 				"<base target='chart'></head><body><table>");
 		for (int i = 0; i < nameList.size(); ++i) {
 			String line = (String) nameList.get(i);
 			String[] lineParts = line.split("\t");
 			String fileName = lineParts[1] + ".html";
 			out.println("<tr><td><code>" + lineParts[1] +
 					"</code></td><td"
 					+ (nameListNew.get(i) ? " class='new'" : "") 
 					+ "><a href='" + chartPrefix + fileName + "'>" + getHeading(lineParts[2]) + "</a></td><td><code>" +
 					lineParts[3] +"</code></td></tr>");
 		}
 		out.println("</table></body></html>");
 		out.close();
 		BagFormatter bf = new BagFormatter();
 		//System.out.println(bf.showSetDifferences("Has name in decomps", hasName, "Has no name in decomps", hasNoName));
 		System.out.println("Name differences: Canonical");
 		showNameDifferences(hasNameCan, hasNoNameCan);
 		System.out.println("Name differences: Compatibility");
 		showNameDifferences(hasNameComp, hasNoNameComp);
 //		System.out.println("Characters with names in decomps: " + hasName.toPattern(true));
 //		System.out.println("Characters without names in decomps: " + hasNoName.toPattern(true));
 //		System.out.println("Characters sometimes with, sometimes without names in decomps: " + both.toPattern(true));
 		System.out.println("Done");
 	}
 	private static boolean isNew(int codepoint) {
 		return Default.ucd().isAllocated(codepoint) && !ucd41.isAllocated(codepoint);
 	}
 	private static void showNameDifferences(Map hasName, Map hasNoName) {
 		Set both = new TreeSet(hasNoName.keySet());
 		both.retainAll(hasName.keySet());
 		//hasNoName.removeAll(both);
 		//hasName.removeAll(both);
 		for (Iterator it = both.iterator(); it.hasNext();) {
 			String decomp = (String) it.next();
 			System.out.println();
 			System.out.println("decomp: " + Utility.hex(decomp));
 			System.out.println("Has name in: " + Utility.hex((String)hasName.get(decomp)));
 			System.out.println("Has no name in: " + Utility.hex((String)hasNoName.get(decomp)));
 		}
 		System.out.println("Count: " + both.size());
 	}
 	static TestIdentifiers ti;
 	static {
 		try {
 			ti = new TestIdentifiers("L");
 		} catch (IOException e) {
 			// TODO Auto-generated catch block
 			e.printStackTrace();
 		}
 	}
 	private static void finishItem(PrintWriter out) {
 		if (lastCodePoint < 0) return;
 		if (lastDecompType != UCD.NONE) {
 			System.out.println("Alert: missing decomp for " + Utility.hex(lastCodePoint));
 		}
 		String str = UTF16.valueOf(lastCodePoint);
 		String upper = showForm(out, str, null, null, Default.ucd().getCase(str,UCD.FULL,UCD.UPPER), "\u2191");
 		showForm(out, str, upper, null, Default.ucd().getCase(str,UCD.FULL,UCD.TITLE), "\u2195");
 		String lower = showForm(out, str, null, null, Default.ucd().getCase(str,UCD.FULL,UCD.LOWER), "\u2193");
 		showForm(out, lower, null, null, Default.ucd().getCase(str,UCD.FULL,UCD.FOLD), "\u2194");
 		String dc = Default.ucd().getDecompositionMapping(lastCodePoint);
 		String nfd = showForm(out, dc, str, null, Default.nfd().normalize(lastCodePoint), "\u21DB");
 		//String nfc = showForm(out, dc, null, Default.nfc().normalize(lastCodePoint), "\u21DB");
 		String nfkd = showForm(out, dc, str, nfd, Default.nfkd().normalize(lastCodePoint), "\u21DD");
 		if (nfkd.equals(str)) {
 			Set s = ti.getConfusables(lastCodePoint, "MA");
 			if (s.size() > 1) {
 				sortedSet.clear();
 				for (Iterator it = s.iterator(); it.hasNext();) {
 					sortedSet.add(Default.nfkd().normalize((String)it.next()));
 				}
 				sortedSet.remove(nfkd); // remove me
 				for (Iterator it = sortedSet.iterator(); it.hasNext();) {
 					String other = (String)it.next();
 					if (nfkd.equals(Default.nfkd().normalize(other))) continue;
 					out.println("<tr><td>\u00A0</td><td>\u00A0</td><td class='conf'>\u279F\u00A0"
 							+ showTextConvertingHex(Utility.hex(other, 4, " + "), true)
 							+ " "
 							+ Default.ucd().getName(other, UCD.NORMAL, " + ").toLowerCase()
 							// maybeNameStyle(showTextConvertingHex(upper, firstChar != '='), firstChar == '=')
 							+ "</td></tr>");
 				}
 			}
 		}
 		lastCodePoint = -1;
 	}
 	static Set sortedSet = new TreeSet(Collator.getInstance(ULocale.ENGLISH));
 	private static String showForm(PrintWriter out, String str, String str2, String str3, String transformed, String symbol) {
 		if (!transformed.equals(str) && !transformed.equals(str2) && !transformed.equals(str3)) {
 			out.println("<tr><td>\u00A0</td><td>\u00A0</td><td class='c'>" + symbol + "\u00A0"
 				+ showTextConvertingHex(Utility.hex(transformed, 4, " + "), true)
 				+ (UTF16.countCodePoint(transformed) != 1 ? "" : 
 					" " + Default.ucd().getName(transformed, UCD.NORMAL, " + ").toLowerCase())
 				// maybeNameStyle(showTextConvertingHex(upper, firstChar != '='), firstChar == '=')
 				+ "</td></tr>");
 		}
 		return transformed;
 	}
 	static public String getHeading(String name) {
 		int pos = name.lastIndexOf(" (");
 		if (pos < 0) return name;
 		return name.substring(0, pos);
 	}
 	private static String maybeNameStyle(String string, boolean b) {
 		if (b && string.equals(string.toUpperCase(Locale.ENGLISH))) return nameStyle(string);
 		return string;
 	}
 	private static String nameStyle(String string) {
 		// TODO Auto-generated method stub
 		String result = "<i>" + Default.ucd().getCase(string, UCD.FULL, UCD.TITLE) + "</i>";
 		// if it has any &xxx;, then restore them.
 		int position = 0;
 		while (true) {
 			if (!escapeMatch.reset(result).find(position)) break;
 			int start = escapeMatch.start();
 			position = escapeMatch.end();
 			result = result.substring(0,start) 
 			+ result.substring(start, position).toLowerCase() 
 			+ result.substring(position);
 		}
 		return result;
 	}
 	static Matcher escapeMatch = Pattern.compile("\\&[A-Z][a-z]*\\;").matcher("");
 	private static String showTextConvertingHex(String body, boolean addCharToHex) {
 		body = TransliteratorUtilities.toHTML.transliterate(body);
 		if (addCharToHex) {
 			int position = 0;
 			while (position < body.length()) {
 				if (!findHex.reset(body).find(position)) break;
 				position = findHex.end();
 				int start = findHex.start();
 				int len = position - start;
 				if (len < 4 || len > 6) continue;
 				int cp = Integer.parseInt(findHex.group(),16);
 				if (cp > 0x10FFFF) continue;
 				String insert = "\u00A0" + showChar(cp);
 				String beginning = body.substring(0,start)
 					+ "<code>" + body.substring(start, position) + "</code>"
 					+ insert;
 				body = beginning + body.substring(position);
 				position = beginning.length();
 			}
 		}
 		return body;
 	}
 	static Matcher pointer = Pattern.compile("x \\((.*) - ([0-9A-F]+)\\)").matcher("");
 	static Matcher pointer2 = Pattern.compile("x ([0-9A-F]{4,6})").matcher("");
 	static Matcher findHex = Pattern.compile("[0-9A-F]+").matcher("");
 	private static String getOther(String body) {
 		// of form: 	x (hyphenation point - 2027)
 		// => arrow 2027 X hyphenation point
 		int cp;
 		String name = null;
 		if (pointer.reset(body).matches()) {
 			cp = Integer.parseInt(pointer.group(2),16);
 			name = pointer.group(1);
 			String name2 = Default.ucd().getName(cp);
 			if (name2 == null) name2 = "<not a character>";
 			if (!name.equalsIgnoreCase(name2)) {
 				System.out.println("Mismatch in name for " + body + " in " + Utility.hex(lastCodePoint));
 				System.out.println("\tName is: " + name2);
 			}
 		} else if (pointer2.reset(body).matches()) {
 			cp = Integer.parseInt(pointer2.group(1),16);
 			// name = UCharacter.getName(cp).toLowerCase();
 			// System.out.println("Irregular format: " + body);
 		} else {
 			throw new IllegalArgumentException("Bad format: " + body);
 		}
 		return "\u2192 " + Utility.hex(cp,4) /*+ " " + showChar(cp)*/ + (name != null ? " " + name : "");
 	}
 	static String showChar(int cp) {
 		if (usePicture.contains(cp)) {
 			int rep = '\u2588';
 			if (cp <= 0x20) rep = 0x2400 + cp;
 			else if (cp == 0x7F) rep = 0x2421;
 			return "<span class='inv'>" + (char)rep + "</span>";
 			//String hex = Utility.hex(cp);
 			//return "<img alt='" + hex + "' src='http://www.unicode.org/cgi-bin/refglyph?24-" + hex + "'>";
 		}
 		int type = Default.ucd().getCategory(cp);
 		if (type == UCD.Cn || type == UCD.Co || type == UCD.Cs) {
 			return "\u2588";
 		}
 		String result = TransliteratorUtilities.toHTML.transliterate(UTF16.valueOf(cp));
 		if (type == UCD.Me || type == UCD.Mn) {
 			result = "\u25CC" + result;
 		} else if (rtl.contains(cp)) {
 			result = "\u200E" + result + "\u200E";
 		}
 		return result;
 	}
 	//static final UnicodeSet noname = new UnicodeSet("[[:ascii:][:ideographic:]]");
 	static final Map hasNoNameCan = new TreeMap();
 	static final Map hasNameCan = new TreeMap();
 	static final Map hasNoNameComp = new TreeMap();
 	static final Map hasNameComp = new TreeMap();
 	private static String checkCanonical(int codePoint, String body) {
 		body = body.substring(2);
 		if (lastDecompType != UCD.CANONICAL) {
 			System.out.println("Mismatching Decomposition Type: " + body + " in " + Utility.hex(codePoint));
 		}
 		String lastDecomp = Default.ucd().getDecompositionMapping(lastCodePoint);
 		String hexed = Utility.hex(lastDecomp, 4, " ");
 		String hexed2 = hexed;
 		if (UTF16.countCodePoint(lastDecomp) == 1) {
 			hexed2 += " " + Default.ucd().getName(lastDecomp).toLowerCase();
 		}
 		if (hexed.equalsIgnoreCase(body)) {
 			hasNoNameCan.put(lastDecomp, UTF16.valueOf(codePoint));
 		} else if (hexed2.equalsIgnoreCase(body)) {
 			hasNameCan.put(lastDecomp, UTF16.valueOf(codePoint));
 		} else {
 			System.out.println("Mismatching Decomposition: " + body + " in " + Utility.hex(codePoint));
 			System.out.println("\tShould be: " + hexed);
 		}
 		lastDecompType = UCD.NONE;
 		return "\u2261 " + body;
 	}
 	private static String checkCompatibility(int codePoint, String body) {
 		body = body.substring(2);
 		if (lastDecompType <= UCD.CANONICAL) {
 			System.out.println("Mismatching Decomposition Type: " + body + " in " + Utility.hex(codePoint));
 		}
 		String lastDecomp = Default.ucd().getDecompositionMapping(lastCodePoint);
 		String hexed = Utility.hex(lastDecomp, 4, " ");
 		if (lastDecompType != UCD.COMPAT_UNSPECIFIED) {
 			String lastDecompID = Default.ucd().getDecompositionTypeID(lastCodePoint);
 			hexed = "<" + lastDecompID + "> " + hexed;
 		}
 		String hexed2 = hexed;
 		if (UTF16.countCodePoint(lastDecomp) == 1) {
 			hexed2 += " " + Default.ucd().getName(lastDecomp).toLowerCase();
 		}
 		if (hexed.equalsIgnoreCase(body)) {
 			hasNoNameComp.put(lastDecomp, UTF16.valueOf(codePoint));
 		} else if (hexed2.equalsIgnoreCase(body)) {
 			hasNameComp.put(lastDecomp, UTF16.valueOf(codePoint));
 		} else {
 			System.out.println("Mismatching Decomposition: " + body + " in " + Utility.hex(codePoint));
 			System.out.println("\tShould be: " + hexed);
 		}
 		lastDecompType = UCD.NONE;
 		return "\u2248 " + body;
 	}
 	static class BlockInfo {
 		BufferedReader in;
 		String lastLine;
 		BlockInfo (String version, String filename) throws IOException {
 			in = Utility.openUnicodeFile(filename, version, true, Utility.LATIN1_WINDOWS);
 			//in = BagFormatter.openUTF8Reader(dir, filename);
 		}
 		boolean next(List inout) throws IOException {
 			inout.clear();
 			if (lastLine != null) {
 				inout.add(lastLine);
 				lastLine = null;
 			}
 			while (true) {
 				String line = in.readLine();
 				if (line == null) break;
 				if (line.startsWith("@@\t")) {
 					lastLine = line;
 					break;
 				}
 				inout.add(line);
 			}
 			return inout.size() > 0;
 		}
 	}
 }
--- a/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.java
+++ b/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.java
--- a/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.txt
@ -1,405 +0,0 @@
 Generate: .*BreakTest.*
 DeltaVersion: 17
 CopyrightYear: 2006
 File: auxiliary/GraphemeBreakProperty
 Property: Grapheme_Cluster_Break
 Format:	skipValue=Other
 File: auxiliary/WordBreakProperty
 Property: Word_Break
 Format:	skipValue=Other
 File: auxiliary/SentenceBreakProperty
 Property: Sentence_Break
 Format:	skipValue=Other
 File: auxiliary/GraphemeBreakTest
 Property: SPECIAL
 File: auxiliary/WordBreakTest
 Property: SPECIAL
 File: auxiliary/LineBreakTest
 Property: SPECIAL
 File: auxiliary/SentenceBreakTest
 Property: SPECIAL
 File:	Blocks
 Property: Block
 # Note:   When comparing block names, casing, whitespace, hyphens,
 #         and underbars are ignored.
 #         For example, "Latin Extended-A" and "latin extended a" are equivalent.
 #         For more information on the comparison of property values, 
 #            see UCD.html.
 Format:	valueList skipUnassigned=No_Block
 File:	CaseFolding
 Property: SPECIAL
 File:	DerivedAge
 Property:	Age
 Format:	nameStyle=none noLabel skipValue=unassigned
 Value:	1.1
 # Assigned as of Unicode 1.1.0 (June, 1993)
 # [excluding removed Hangul Syllables]
 Value:	2.0
 # Newly assigned in Unicode 2.0.0 (July, 1996)
 Value:	2.1
 # Newly assigned in Unicode 2.1.2 (May, 1998)
 Value:	3.0
 # Newly assigned in Unicode 3.0.0 (September, 1999)
 Value:	3.1
 # Newly assigned in Unicode 3.1.0 (March, 2001)
 Value:	3.2
 # Newly assigned in Unicode 3.2.0 (March, 2002)
 Value:	4.0
 # Newly assigned in Unicode 4.0.0 (April, 2003)
 Value:	4.1
 # Newly assigned in Unicode 4.1.0 (March, 2005)
 Value:	5.0
 # Newly assigned in Unicode 5.0.0 (XXX, 2006)
 File:	extracted/DerivedBidiClass
 Property:	Bidi_Class
 # Bidi Class (listing UnicodeData.txt, field 4: see UCD.html)
 # Unlike other properties, unassigned code points in blocks 
 # reserved for right-to-left scripts are given either types R or AL.
 # The unassigned characters that default to R are:
 #   Hebrew, Cypriot_Syllabary, Kharoshthi, and the ranges \u07C0-\u08FF
 #   \uFB1D-\uFB4F \U00010840-\U000109FF \U00010A60-\U00010FFF
 # The unassigned characters that default to AL are:
 #   Arabic, Syriac, Arabic_Supplement, Thaana, Arabic_Presentation_Forms_A,
 #   Arabic_Presentation_Forms_B, minus the Noncharacter_Code_Points
 # For all other cases:
 Format:	valueStyle=short skipUnassigned=Left_To_Right
 File:	extracted/DerivedBinaryProperties
 Property:	Bidi_Mirrored
 # Bidi_Mirrored (listing UnicodeData.txt, field 9: see UCD.html)
 File:	extracted/DerivedCombiningClass
 Property:	Canonical_Combining_Class
 # Combining Class (listing UnicodeData.txt, field 3: see UCD.html)
 Format: nameStyle=none valueStyle=short skipUnassigned=Not_Reordered
 File:	DerivedCoreProperties
 Property:	Math
 # Derived Property: Math
 #  Generated from: Sm + Other_Math
 Property:	Alphabetic
 # Derived Property: Alphabetic
 #  Generated from: Lu+Ll+Lt+Lm+Lo+Nl + Other_Alphabetic
 Property:	Lowercase
 # Derived Property: Lowercase
 #  Generated from: Ll + Other_Lowercase
 Property:	Uppercase
 # Derived Property: Uppercase
 #  Generated from: Lu + Other_Uppercase
 Property:	ID_Start
 # Derived Property: ID_Start
 #  Characters that can start an identifier.
 #  Generated from Lu+Ll+Lt+Lm+Lo+Nl+Other_ID_Start
 #  NOTE: See UAX #31 for more information
 Property:	ID_Continue
 # Derived Property: ID_Continue
 #  Characters that can continue an identifier.
 #  Generated from: ID_Start + Mn+Mc+Nd+Pc + Other_ID_Continue
 #  NOTE: See UAX #31 for more information
 Property:	XID_Start
 # Derived Property: XID_Start
 #  ID_Start modified for closure under NFKx
 #  Modified as described in UAX #15
 #  NOTE: Does NOT remove the non-NFKx characters.
 #        Merely ensures that if isIdentifer(string) then isIdentifier(NFKx(string))
 #  NOTE: See UAX #31 for more information
 Property:	XID_Continue
 # Derived Property: XID_Continue
 #  Mod_ID_Continue modified for closure under NFKx
 #  Modified as described in UAX #15
 #  NOTE: Cf characters should be filtered out.
 #  NOTE: Does NOT remove the non-NFKx characters.
 #        Merely ensures that if isIdentifer(string) then isIdentifier(NFKx(string))
 #  NOTE: See UAX #31 for more information
 Property:	Default_Ignorable_Code_Point
 # Derived Property: Default_Ignorable_Code_Point
 #  Generated from Other_Default_Ignorable_Code_Point + Cf + Cc + Cs + Noncharacters
 #  - White_Space - FFF9..FFFB (Annotation Characters)
 Property:	Grapheme_Extend
 # Derived Property: Grapheme_Extend
 #  Generated from: Me + Mn + Other_Grapheme_Extend
 #  Note: depending on an application's interpretation of Co (private use),
 #  they may be either in Grapheme_Base, or in Grapheme_Extend, or in neither.
 Property:	Grapheme_Base
 # Derived Property: Grapheme_Base
 #  Generated from: [0..10FFFF] - Cc - Cf - Cs - Co - Cn - Zl - Zp - Grapheme_Extend
 #  Note: depending on an application's interpretation of Co (private use),
 #  they may be either in Grapheme_Base, or in Grapheme_Extend, or in neither.
 Property:	Grapheme_Link
 # Derived Property: Grapheme_Link (deprecated)
 #  Generated from: Canonical_Combining_Class=Virama
 #  Use Canonical_Combining_Class=Virama directly instead
 File:	extracted/DerivedDecompositionType
 Property:	Decomposition_Type
 Format:	skipValue=None
 # Decomposition_Type (from UnicodeData.txt, field 5: see UCD.html)
 File:	extracted/DerivedEastAsianWidth
 Property:	East_Asian_Width
 Format:	valueStyle=short skipUnassigned=Neutral
 # East_Asian_Width (listing EastAsianWidth.txt, field 1)
 File:	extracted/DerivedGeneralCategory
 Property:	General_Category
 Format:	valueStyle=short noLabel
 File:	extracted/DerivedJoiningGroup
 Property:	Joining_Group
 # Joining Group (listing ArabicShaping.txt, field 3)
 Format: skipValue=No_Joining_Group
 File:	extracted/DerivedJoiningType
 Property:	Joining_Type
 #	Type T is derived, as described in ArabicShaping.txt
 Format:	valueStyle=short skipValue=Non_Joining
 File:	extracted/DerivedLineBreak
 Property:	Line_Break
 Format:	valueStyle=short skipUnassigned=Unknown
 File:	DerivedNormalizationProps
 Property:	FC_NFKC_Closure
 # Derived Property: FC_NFKC_Closure
 #  Generated from computing: b = NFKC(Fold(a)); c = NFKC(Fold(b));
 #  Then if (c != b) add the mapping from a to c to the set of
 #  mappings that constitute the FC_NFKC_Closure list
 #  Uses the full case folding from CaseFolding.txt, without the T option.
 Format:	nameStyle=short
 Property:	Full_Composition_Exclusion
 # Derived Property: Full_Composition_Exclusion
 #  Generated from: Composition Exclusions + Singletons + Non-Starter Decompositions
 Property:	NFD_QuickCheck
 # Derived Property: NFD_QuickCheck
 #  Generated from computing decomposibles
 Format: nameStyle=short valueStyle=short skipValue=Yes
 Property:	NFC_QuickCheck
 # Derived Property: NFC_QuickCheck
 #  Generated from computing decomposibles (and characters that may compose with previous ones)
 Format: nameStyle=short valueStyle=short skipValue=Yes
 Property:	NFKD_QuickCheck
 # Derived Property: NFKD_QuickCheck
 #  Generated from computing decomposibles
 Format: nameStyle=short valueStyle=short skipValue=Yes
 Property:	NFKC_QuickCheck
 # Derived Property: NFKC_QuickCheck
 #  Generated from computing decomposibles (and characters that may compose with previous ones)
 Format: nameStyle=short valueStyle=short skipValue=Yes
 Property:	Expands_On_NFD
 # Derived Property: Expands_On_NFD
 #   Generated according to UAX #15.
 #   Characters whose normalized length is not one.
 #   WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact.
 #            The length of a normalized string is not necessarily the sum of the lengths of the normalized characters!
 Property:	Expands_On_NFC
 # Derived Property: Expands_On_NFC
 #   Generated according to UAX #15.
 #   Characters whose normalized length is not one.
 #   WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact.
 #            The length of a normalized string is not necessarily the sum of the lengths of the normalized characters!
 Property:	Expands_On_NFKD
 # Derived Property: Expands_On_NFKD
 #   Generated according to UAX #15.
 #   Characters whose normalized length is not one.
 #   WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact.
 #            The length of a normalized string is not necessarily the sum of the lengths of the normalized characters!
 Property:	Expands_On_NFKC
 # Derived Property: Expands_On_NFKC
 #   Generated according to UAX #15.
 #   Characters whose normalized length is not one.
 #   WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact.
 #            The length of a normalized string is not necessarily the sum of the lengths of the normalized characters!
 File:	extracted/DerivedNumericType
 Property:	Numeric_Type
 # Numeric Type (from UnicodeData.txt, field 6/7/8 plus Unihan.txt: see UCD.html)
 Format: skipValue=None
 File:	extracted/DerivedNumericValues
 Property:	Numeric_Value
 # Numeric Values (from UnicodeData.txt, field 6/7/8)
 # WARNING: Certain valus, such as 0.16666667, are repeating fractions
 # Although they are only printed with a limited number of decimal places
 # in this file, they should be expressed to the limits of the precision
 # available when used.
 Format: sortNumeric
 File:	HangulSyllableType
 Property:	Hangul_Syllable_Type
 Format:	valueStyle=short skipValue=Not_Applicable
 File:	NormalizationTest
 Property: SPECIAL
 File:	PropList
 Property:	White_Space
 Property:	Bidi_Control
 Property:	Join_Control
 Property:	Dash
 Property:	Hyphen
 Property:	Quotation_Mark
 Property:	Terminal_Punctuation
 Property:	Other_Math
 Property:	Hex_Digit
 Property:	ASCII_Hex_Digit
 Property:	Other_Alphabetic
 Property:	Ideographic
 Property:	Diacritic
 Property:	Extender
 Property:	Other_Lowercase
 Property:	Other_Uppercase
 Property:	Noncharacter_Code_Point
 Property:	Other_Grapheme_Extend
 Property:	IDS_Binary_Operator
 Property:	IDS_Trinary_Operator
 Property:	Radical
 Property:	Unified_Ideograph
 Property:	Other_Default_Ignorable_Code_Point
 Property:	Deprecated
 Property:	Soft_Dotted
 Property:	Logical_Order_Exception
 Property:	Other_ID_Start
 Property:	Other_ID_Continue
 Property:	STerm
 Property:	Variation_Selector
 Property:	Pattern_White_Space
 Property:	Pattern_Syntax
 File:	PropertyAliases
 Property: SPECIAL
 File:	PropertyValueAliases
 Property: SPECIAL
 File:	Scripts
 Property:	Script
 Format:	nameStyle=none skipValue=Unknown
 File:	SpecialCasing
 Property: SPECIAL
 File:	StandardizedVariants
 Property: SPECIAL
 File:	NamedSequences
 Property: SPECIAL
 HackName:	noBreak
 HackName:	Arabic_Presentation_Forms-A
 HackName:	Arabic_Presentation_Forms-B
 HackName:	CJK_Symbols_and_Punctuation
 HackName:	Combining_Diacritical_Marks_for_Symbols
 HackName:	Enclosed_CJK_Letters_and_Months
 HackName:	Greek_and_Coptic
 HackName:	Halfwidth_and_Fullwidth_Forms
 HackName:	Latin-1_Supplement
 HackName:	Latin_Extended-A
 HackName:	Latin_Extended-B
 HackName:	Miscellaneous_Mathematical_Symbols-A
 HackName:	Miscellaneous_Mathematical_Symbols-B
 HackName:	Miscellaneous_Symbols_and_Arrows
 HackName:	Superscripts_and_Subscripts
 HackName:	Supplemental_Arrows-A
 HackName:	Supplemental_Arrows-B
 HackName:	Supplementary_Private_Use_Area-A
 HackName:	Supplementary_Private_Use_Area-B
 HackName:	Canadian-Aboriginal
 #HackName:	Old-Italic
 FinalComments
 Note that PropertyAliases sorts by the long name, while PropertyValueAliases
 sorts by the short name
 ArabicShaping
 BidiMirroring
 CompositionExclusions
 EastAsianWidth
 LineBreak
 StandardizedVariants
 UnicodeData
--- a/tools/unicodetools/com/ibm/text/UCD/MyFloatLister.java
+++ b/tools/unicodetools/com/ibm/text/UCD/MyFloatLister.java
@ -1,50 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/MyFloatLister.java,v $
 * $Date: 2004/03/11 19:03:17 $
 * $Revision: 1.6 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCD;
 import java.io.*;
 import java.text.NumberFormat;
 import java.util.Locale;
 class MyFloatLister extends PropertyLister {
    private double propMask;
    NumberFormat nf = NumberFormat.getNumberInstance(Locale.US);
    public MyFloatLister(UCD ucd, double f, PrintWriter output) {
        this.propMask = f;
        this.output = output;
        this.ucdData = ucd;
        nf.setGroupingUsed(false);
        nf.setMaximumFractionDigits(8);
        nf.setMinimumFractionDigits(1);
    }
    public String valueName(int cp) {
        return nf.format(ucdData.getNumericValue(cp));
    }
    public String optionalName(int cp) {
        return ucdData.getNumericTypeID(cp);
    }
    public byte status(int cp) {
        //if ((cp & 0xFFF) == 0) System.out.println("# " + Utility.hex(cp));
        if (false && !ucdData.isRepresented(cp)) {
            if (ucdData.mapToRepresentative(cp, ucdData.getCompositeVersion()) != cp) return PropertyLister.CONTINUE;
            return PropertyLister.CONTINUE;
        }
        if (ucdData.getCategory(cp) == Cn) return PropertyLister.CONTINUE;
        return ucdData.getNumericValue(cp) == propMask ? INCLUDE : EXCLUDE;
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/MyPropertyLister.java
+++ b/tools/unicodetools/com/ibm/text/UCD/MyPropertyLister.java
@ -1,123 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/MyPropertyLister.java,v $
 * $Date: 2004/02/18 03:08:59 $
 * $Revision: 1.12 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCD;
 import java.io.*;
 import com.ibm.text.utility.*;
 final class MyPropertyLister extends PropertyLister {
    static final boolean BRIDGE = false;
    private int propMask;
    private boolean isDefaultValue = false;
    private UCDProperty up;
    public MyPropertyLister(UCD ucd, int propMask, PrintWriter output) {
        this.propMask = propMask;
        this.output = output;
        this.ucdData = ucd;
        up = UnifiedBinaryProperty.make(propMask, ucd);
        if (propMask < COMBINING_CLASS) usePropertyComment = false; // skip gen cat
        isDefaultValue = up.isDefaultValue();
    }
    public String headerString() {
        int main = (propMask & 0xFF00);
        if (main == COMBINING_CLASS) {
            String s = UCD.getCombiningClassID_fromIndex((short)(propMask & 0xFF), LONG);
            if (s.charAt(0) <= '9') s = "Other Combining Class";
            return "# " + s;
        } else if (main == BINARY_PROPERTIES) {
            return "";
        } else if (main == JOINING_GROUP) {
            return "";
        } else {
            return "";
            /*
            String shortID = up.getName(SHORT);
            String longID = up.getName(LONG);
            return "# ???? " + shortID + (shortID.equals(longID) ? "" : "\t(" + longID + ")");
            */
        }
    }
    public String valueName(int cp) {
        if (up.getValueType() == BINARY_PROP) return up.getName();
        return up.getValue(cp);
    }
    public String missingValueName() {
        return up.getValue(NORMAL);
    }
    public String optionalComment(int cp) {
        if (propMask < COMBINING_CLASS) return ""; // skip gen cat
        int cat = ucdData.getCategory(cp);
        if (cat == Lt || cat == Ll || cat == Lu) return "L&";
        return ucdData.getCategoryID(cp);
    }
    /*
    public String optionalName(int cp) {
        if ((propMask & 0xFF00) == DECOMPOSITION_TYPE) {
            return Utility.hex(ucdData.getDecompositionMapping(cp));
        } else {
            return "";
        }
    }
    */
    public byte status(int cp) {
        //if (cp == 0xFFFF) {
        //    System.out.println("# " + Utility.hex(cp));
        //}
        byte cat = ucdData.getCategory(cp);
        //if (cp == 0x0385) {
        //    System.out.println(Utility.hex(firstRealCp));
        //}
        if (isDefaultValue 
            && cat == Cn
            && propMask != (BINARY_PROPERTIES | Noncharacter_Code_Point)
            && propMask != (BINARY_PROPERTIES | Other_Default_Ignorable_Code_Point)
            && propMask != (CATEGORY | Cn)) {
            if (BRIDGE) return CONTINUE;
            else return EXCLUDE;
        }
        boolean inSet = up.hasValue(cp);
        /*
        if (cp >= 0x1D400 && cp <= 0x1D7C9 && cat != Cn) {
            if (propMask == (SCRIPT | LATIN_SCRIPT)) inSet = cp <= 0x1D6A3;
            else if (propMask == (SCRIPT | GREEK_SCRIPT)) inSet = cp > 0x1D6A3;
        }
        */
 /* HACK
 1D400;MATHEMATICAL BOLD CAPITAL A;Lu;0;L;<font> 0041;;;;N;;;;;
 1D6A3;MATHEMATICAL MONOSPACE SMALL Z;Ll;0;L;<font> 007A;;;;N;;;;;
 1D6A8;MATHEMATICAL BOLD CAPITAL ALPHA;Lu;0;L;<font> 0391;;;;N;;;;;
 1D7C9;MATHEMATICAL SANS-SERIF BOLD ITALIC PI SYMBOL;Ll;0;L;<font> 03D6;;;;N;;;;;
 */
        if (!inSet) return EXCLUDE;
        return INCLUDE;
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/NFCSkippable.java
+++ b/tools/unicodetools/com/ibm/text/UCD/NFCSkippable.java
@ -1,20 +0,0 @@
 package com.ibm.text.UCD;
 public class NFCSkippable {
    // find all the characters that are 
    // a) not decomposed by this normalization form
    // b) of combining class 0
    // AND if NKC or NFKC, 
    // c) can never compose with a previous character
    // d) can never compose with a following character
    // e) can never change if another character is added
    //    Example: a-breve might satisfy a-d, but if you
    //    add an ogonek it changes to a-ogonek + breve
    public boolean is(int cp) {
    	return false;
    }
    public static void main (String[] args) {
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/NFSkippable.java
+++ b/tools/unicodetools/com/ibm/text/UCD/NFSkippable.java
@ -1,301 +0,0 @@
 package com.ibm.text.UCD;
 import com.ibm.icu.impl.CollectionUtilities;
 import com.ibm.icu.text.Collator;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.util.ULocale;
 import java.util.BitSet;
 import com.ibm.text.utility.*;
 import java.io.PrintWriter;
 public final class NFSkippable extends UCDProperty {
    static final boolean DEBUG = false;
    private Normalizer nf;
    private Normalizer nfd;
    private UCD ucd;
    private boolean composes;
    private int[] realTrailers = new int[100];
    private int realTrailerCount = 0;
    public NFSkippable(byte normalizerMode, UCD inputUCD) {
        isStandard = false;
        this.ucd = inputUCD;
        nf = new Normalizer(normalizerMode, ucd.getVersion());
        name = nf.getName() + "_Skippable";
        shortName = nf.getName() + "_Skip";
        header = "# Derived Property: " + name
            + "\r\n#   Generated according to UAX #15."
            + "\r\n#   Characters that don't interact with any others in this normalization form."
            + "\r\n#   WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact."
            + "\r\n#            The length of a normalized string is not necessarily the sum of the lengths of the normalized characters!";
        nfd = new Normalizer(Normalizer.NFD, ucd.getVersion());
        composes = normalizerMode == Normalizer.NFC || normalizerMode == Normalizer.NFKC;
        // preprocess to find possible trailers
        if (composes) for (int cp2 = 0; cp2 <= 0x10FFFF; ++cp2) {
            if (nf.isTrailing(cp2)) {
                //System.out.println("Trailing: " + ucd.getCodeAndName(cp2));
                if (ucd.isNonLeadJamo(cp2)) {
                    //System.out.println("Jamo: " + ucd.getCodeAndName(cp2));
                    continue;
                }
                realTrailers[realTrailerCount++] = cp2;
            }
        }
        Utility.fixDot();
        //System.out.println("trailer count: " + realTrailerCount);
    }
    /** A skippable character is<br>
    * a) unassigned, or ALL of the following:<br>
    * b) of combining class 0.<br>
    * c) not decomposed by this normalization form.<br>
    * AND if NKC or NFKC, <br>
    * d) can never compose with a previous character.<br>
    * e) can never compose with a following character.<br>
    * f) can never change if another character is added.
    *    Example: a-breve might satisfy all but f, but if you
    *    add an ogonek it changes to a-ogonek + breve
    */
    String cause = "";
    public boolean hasValue(int cp) {
        // quick check on some special classes
        if (DEBUG) cause = "\t\tunassigned";
        if (!ucd.isAssigned(cp)) return true;
        if (DEBUG) cause = "\t\tnf differs";
        if (!nf.isNormalized(cp)) return false;
        if (DEBUG) cause = "\t\tnon-zero cc";
        if (ucd.getCombiningClass(cp) != 0) return false;
        if (DEBUG) cause = "";
        if (!composes) return true;
        // now special checks for composing normalizers
        if (DEBUG) cause = "\t\tleading";
        if (nf.isLeading(cp)) return false;
        if (DEBUG) cause = "\t\ttrailing";
        if (nf.isTrailing(cp)) return false;
        // OPTIMIZATION -- careful
        // If there is no NFD decomposition, then this character's accents can't be
        // "displaced", so we don't have to test further
        if (DEBUG) cause = "\t\tno decomp";
        if (nfd.isNormalized(cp)) return true;
        // OPTIMIZATION -- careful
        // Hangul syllables are skippable IFF they are isLeadingJamoComposition
        if (ucd.isHangulSyllable(cp)) return !ucd.isLeadingJamoComposition(cp);
        // We now see if adding another character causes a problem. 
        // brute force for now!!
        // We do skip the trailing Jamo, since those never displace!
        StringBuffer base = new StringBuffer(UTF16.valueOf(cp));
        int baseLen = base.length();
        for (int i = 0; i < realTrailerCount; ++i) {
            base.setLength(baseLen); // shorten if needed
            base.append(UTF16.valueOf(realTrailers[i]));
            String probe = base.toString();
            String result = nf.normalize(probe);
            if (!result.equals(probe)) {
                if (DEBUG) cause = "\t\tinteracts with " + ucd.getCodeAndName(realTrailers[i]);
                return false;
            }
        }
        // passed the sieve, so we are ok
        if (DEBUG) cause = "";
        return true;
    }
    // both the following should go into UTF16
    public static String replace(String source, int toReplace, int replacement) {
        if (0 <= toReplace && toReplace <= 0xFFFF
            && 0 <= replacement && replacement <= 0xFFFF) {
            return source.replace((char)toReplace, (char)replacement);
        }
        return replace(source, UTF16.valueOf(toReplace), UTF16.valueOf(replacement));
    }
    public static String replace(String source, String toReplace, String replacement) {
        int pos = 0;
        StringBuffer result = new StringBuffer(source.length());
        while (true) {
            int newPos = source.indexOf(toReplace, pos);
            if (newPos >= 0) {
                result.append(source.substring(pos, newPos));
                result.append(replacement);
                pos = newPos + toReplace.length();
            } else if (pos != 0) {
                result.append(source.substring(pos));
                return result.toString();
            } else {
                return source; // no change necessary
            }
        }
    }
    static void writeStringInPieces(PrintWriter pw, String s, String term) {
        int start;
        int end;
        int lineLen = 64;
        for (start = 0; ; start = end) {
            if (start == 0) pw.print("\t  \"");
            else pw.print("\t+ \"");
            end = s.length();
            if (end > start + lineLen) end = start + lineLen;
            // if we have a slash in the last 5 characters, backup
            int lastSlash = s.lastIndexOf('\\', end);
            if (lastSlash >= end-5) end = lastSlash;
            // backup if we broke on a \
            while (end > start && s.charAt(end-1) == '\\') --end;
            pw.print(s.substring(start, end));
            if (end == s.length()) {
                pw.println('"' + term);
                break;
            } else {
                pw.println('"');
            }
        }
    }
    static void testWriteStringInPieces() {
        String test =
 	  "[^\\u00C0-\\u00C5\\u00C7-\\u00CF\\u00D1-\\u00D6\\u00D9-\\u00DD"
 	+ "\\u00E0-\\u00E5\\u00E7-\\u00EF\\u00F1-\\u00F6\\u00F9-\\u00FD\\u00F"
 	+ "F-\\u010F\\u0112-\\u0125\\u0128-\\u0130\\u0134-\\u0137\\u0139-"
 	+ "\\u013E\\u0143-\\u0148\\u014C-\\u0151\\u0154-\\u0165\\u0168-\\u017"
 	+ "E\\u01A0-\\u01A1\\u01AF-\\u01B0\\u01CD-\\u01DC\\u01DE-\\u01E3\\u"
 	+ "01E6-\\u01F0\\u01F4-\\u01F5\\u01F8-\\u021B\\u021E-\\u021F\\u0226";
 	    PrintWriter pw = new PrintWriter(System.out);
 	    writeStringInPieces(pw,test,"");
 	    writeStringInPieces(pw,replace(test, "\\", "\\\\"),"");
 	    pw.flush();
 	}
    static int limit = 0x10FFFF; // full version = 10ffff, for testing may use smaller
    public static void main (String[] args) throws java.io.IOException {
        PrintWriter out = Utility.openPrintWriter("NFSafeSets.txt", Utility.UTF8_WINDOWS);
        out.println(Utility.BOM);
        out.println("NFSafeSets");
        out.println("Version: " + Default.ucd().getVersion());
        out.println("Date: " + Default.getDate());
        out.println();
        for (int mode = NFD_UnsafeStart; mode <= NFKC_UnsafeStart; ++mode) {
            UCDProperty up = DerivedProperty.make(mode, Default.ucd());
            generateSet(out, "UNSAFE[" + Normalizer.getName((byte)(mode-NFD_UnsafeStart)) + "]", up);
        }
        for (byte mode = NFD; mode <= NFKC; ++mode) {
            NFSkippable skipper = new NFSkippable(mode, Default.ucd());
            generateSet(out, "SKIPPABLE[" + Normalizer.getName(mode) + "]", skipper);
        }
        System.out.println("Done");
        out.close();
    }
    static Collator UCA = Collator.getInstance(ULocale.ROOT);
    static void generateSet(PrintWriter out, String label, UCDProperty up) {
        System.out.println("Generating: " + up.getName(NORMAL));
        UnicodeSet result = new UnicodeSet();
        for (int cp = 0; cp <= limit; ++cp) {
            Utility.dot(cp);
            if (up.hasValue(cp)) result.add(cp);
        }
        Utility.fixDot();
        String rSet = result.toPattern(true);          
        rSet = replace(rSet, "\\U", "\\\\U");
        rSet = replace(rSet, "\\u", "\\\\u");
        out.println(label + " = new UnicodeSet(");
        writeStringInPieces(out, rSet, ", false);");
        if (true) {
        	rSet = result.toPattern(false);
        } else {
        	rSet = CollectionUtilities.prettyPrint(result, true, null, null, UCA, UCA);
        }
        out.println("/*Unicode: ");
        writeStringInPieces(out, rSet, "*/");
        out.println();
        out.flush();
        System.out.println("Done");
    }
            /*
       // DerivedProperty dp = new DerivedProperty(UCD.make(version));
            System.out.println(skipper.getName(NORMAL));
            UnicodeSet result = new UnicodeSet();
            for (int cp = 0; cp <= limit; ++cp) {
                Utility.dot(cp);
                if (skipper.hasProperty(cp)) result.add(cp);
            }
            Utility.fixDot();
            String rSet = result.toPattern(true);
            rSet = replace(rSet, "\\U", "\\\\U");
            out.println("\tSKIPPABLE[" + skipper.getName(NORMAL)
                + "] = new UnicodeSet(");
            writeStringInPieces(out, rSet, ", false);");
            out.println();
            rSet = result.toPattern(false);
            out.println("/*Unicode: ");
            */
            //writeStringInPieces(out, rSet, "*/");
            /*out.println();
            out.flush();
        if (false) {
            NFSkippable skipper = new NFSkippable(Normalizer.NFC,"");
            NFSkippable skipper2 = new NFSkippable(Normalizer.NFKC,"");
            for (int cp = 0; cp <= 0x10FFFF; ++cp) {
                if (cp > 0xFF) {
                    if (!skipper.ucd.isAssigned(cp)) continue;
                    byte cat = skipper.ucd.getCategory(cp);
                    if (cat == PRIVATE_USE || cat == SURROGATE) continue;
                    if (skipper.ucd.getCombiningClass(cp) != 0) continue;
                    if (!skipper.nf.isNormalized(cp)) continue;
                    if ((cp < 0xAC00 || cp > 0xAE00)
                        && cp != skipper.ucd.mapToRepresentative(cp, false)) continue;
                }
                if (skipper2.hasProperty(cp) == skipper.hasProperty(cp)) continue;
                String status = (skipper.hasProperty(cp) ? "  SKIPc " : "NOSKIPc ")
                    + (skipper2.hasProperty(cp) ? "  SKIPkc " : "NOSKIPkc ");
                System.out.println(status
                    + skipper.ucd.getCodeAndName(cp)
                    + skipper.cause);
            }
        }
        */
 }
--- a/tools/unicodetools/com/ibm/text/UCD/NamedSequences-Template.html
+++ b/tools/unicodetools/com/ibm/text/UCD/NamedSequences-Template.html
@ -1,153 +0,0 @@
 <!doctype HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 <meta http-equiv="Content-Language" content="en-us">
 <meta name="GENERATOR" content="Microsoft FrontPage 5.0">
 <meta name="ProgId" content="FrontPage.Editor.Document">
 <meta name="keywords" content="unicode, variant glyphs">
 <meta name="description" content="Describes and displays standardized variant glyphs">
 <title>Named Sequences</title>
 <link rel="stylesheet" type="text/css" href="http://www.unicode.org/reports/reports.css">
 <style>
 <!--
 .copy	{ text-align: center; font-size: 150% }
 th, td	{ vertical-align: middle }
 tt		{ font-size: 8pt }
 table	{ padding: 2pt }
 -->
 </style>
 </head>
 <body bgcolor="#ffffff">
 <table class="header">
  <tr>
    <td class="icon"><a href="http://www.unicode.org">
    <img align="middle" alt="[Unicode]" border="0" src="http://www.unicode.org/webscripts/logo60s2.gif" width="34" height="33"></a>&nbsp;&nbsp;<a class="bar" href="http://www.unicode.org/ucd">Unicode 
    Character Database</a></td>
  </tr>
  <tr>
    <td class="gray">&nbsp;</td>
  </tr>
 </table>
 <div style="margin:1em">
  <table border="1" cellpadding="0" cellspacing="1" style="border-collapse: collapse" bordercolor="#111111" width="100%" id="AutoNumber1">
    <tr>
      <td width="100%">
      <p style="text-align: right">L2-XXX</p>
      <p><i>To: UTC<br>
      From: Mark Davis<br>
      Date: 2005-04-28</i></p>
      <p><i>One of the original ideas for Unicode 4.1.0 was to produce a NamedSequences.html, 
      following the pattern of StandardizedVariants.html. This document was generated along those 
      lines, but not added into U4.1.0. My suggestion instead is to add this file (with suitable 
      style modifications, of course) as a chart someplace accessible under
      <a href="http://unicode.org/charts/">http://unicode.org/charts/</a>.</i></p>
      <p><i>Alternatively, we could also combine this with the StandardizedVariants.html to provide 
      a unified chart of sequences, again someplace under <a href="http://unicode.org/charts/">
      http://unicode.org/charts/</a>.</i></p>
      <p><i><b>Note:</b> we don&#39;t have some of the glyphs quite right yet, but it should be 
      sufficient for discussing the format. One of the innovations is having a separate column of 
      text that for copy&amp;paste; that needs discussion also.</i></td>
    </tr>
  </table>
  <h1><i><font color="#990000">&nbsp;PROPOSED WORKING DRAFT<br>
  </font></i>Named Sequences</h1>
  <table class="wide">
    <tr>
      <td valign="top" width="144">Revision</td>
      <td valign="top">@revision@</td>
    </tr>
    <tr>
      <td valign="top" width="144">Authors</td>
      <td valign="top">Members of the Editorial Committee</td>
    </tr>
    <tr>
      <td valign="top" width="144">Date</td>
      <td valign="top">@date@</td>
    </tr>
    <tr>
      <td valign="top" width="144">This Version</td>
      <td valign="top">
      <a href="http://www.unicode.org/Public/@updateDirectory@/NamedSequences-@revision@.html">
      http://www.unicode.org/Public/@updateDirectory@/NamedSequences-@revision@.html</a></td>
    </tr>
    <tr>
      <td valign="top" width="144">Previous Version</td>
      <td valign="top">n/a</td>
    </tr>
    <tr>
      <td valign="top" width="144">Latest Version</td>
      <td valign="top">n/a</td>
    </tr>
  </table>
  <h3><br>
  <i>Summary</i></h3>
  <blockquote>
    <p>This file provides a visual display of the named sequences derived from NamedSequences.txt.<i>The 
    proposal is to add this, </i></p>
  </blockquote>
  <h3><i>Status</i></h3>
  <blockquote>
    <p><i>The file and the files described herein are part of the
    <a href="http://www.unicode.org/ucd">Unicode Character Database</a> (UCD) and are governed by 
    the <a href="#Terms of Use">UCD Terms of Use</a> stated at the end.</i></p>
  </blockquote>
  <hr width="50%">
  <h2>Introduction</h2>
  <p>The tables here exhaustively lists the valid, registered named sequences. The columns include a 
  representative glyph, the sequence of code points in hex, and the name of the sequence. In 
  addition, there is a last column entitled <i>Copyable</i>, which contains the literal text forming 
  the sequence. That text can be copied and pasting in elsewhere. The display of the text in this 
  column is up to the capabilities of the browser and the set of available fonts. For more 
  information, see <a href="http://www.unicode.org/help/display_problems.html">Display Problems?</a>.</p>
  <blockquote>
    <p><a name="fonts"><b>Note: </b></a>The representative glyphs used to show the names sequences 
    are often derived from different physical fonts than the representative glyphs in the standard. 
    They may therefore exhibit minor differences in size, proportion, style, or weight.</p>
  </blockquote>
  <p>@table@</p>
  <hr width="50%">
  <h2>UCD <a name="Terms of Use">Terms of Use</a></h2>
  <h3><i>Disclaimer</i></h3>
  <blockquote>
    <p><i>The Unicode Character Database is provided as is by Unicode, Inc. No claims are made as to 
    fitness for any particular purpose. No warranties of any kind are expressed or implied. The 
    recipient agrees to determine applicability of information provided. If this file has been 
    purchased on magnetic or optical media from Unicode, Inc., the sole remedy for any claim will be 
    exchange of defective media within 90 days of receipt.</i></p>
    <p><i>This disclaimer is applicable for all other data files accompanying the Unicode Character 
    Database, some of which have been compiled by the Unicode Consortium, and some of which have 
    been supplied by other sources.</i></p>
  </blockquote>
  <h3><i>Limitations on Rights to Redistribute This Data</i></h3>
  <blockquote>
    <p><i>Recipient is granted the right to make copies in any form for internal distribution and to 
    freely use the information supplied in the creation of products supporting the Unicode<sup>TM</sup> 
    Standard. The files in the Unicode Character Database can be redistributed to third parties or 
    other organizations (whether for profit or not) as long as this notice and the disclaimer notice 
    are retained. Information can be extracted from these files and used in documentation or 
    programs, as long as there is an accompanying notice indicating the source.</i></p>
  </blockquote>
  <hr width="50%">
  <div align="center">
    <center>
    <table cellspacing="0" cellpadding="0" border="0">
      <tr>
        <td><a href="http://www.unicode.org/unicode/copyright.html">
        <img src="http://www.unicode.org/img/hb_notice.gif" border="0" alt="Access to Copyright and terms of use" width="216" height="50"></a></td>
      </tr>
    </table>
    <script language="Javascript" type="text/javascript" src="http://www.unicode.org/webscripts/lastModified.js">
    </script>
    </center>
  </div>
  <blockquote>
  </blockquote>
 </div>
 </body>
 </html>
--- a/tools/unicodetools/com/ibm/text/UCD/NormalizationTestHeader.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/NormalizationTestHeader.txt
@ -1,32 +0,0 @@
 #
 # Normalization Test Suite
 # Format:
 #
 #   Columns (c1, c2,...) are separated by semicolons
 #   Comments are indicated with hash marks
 #
 # CONFORMANCE:
 # 1. The following invariants must be true for all conformant implementations
 #
 #    NFC
 #      c2 ==  NFC(c1) ==  NFC(c2) ==  NFC(c3)
 #      c4 ==  NFC(c4) ==  NFC(c5)
 #
 #    NFD
 #      c3 ==  NFD(c1) ==  NFD(c2) ==  NFD(c3)
 #      c5 ==  NFD(c4) ==  NFD(c5)
 #
 #    NFKC
 #      c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
 #
 #    NFKD
 #      c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
 #
 # 2. For every code point X assigned in this version of Unicode that is not specifically
 #    listed in Part 1, the following invariants must be true for all conformant
 #    implementations:
 #
 #      X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)
 #
@Part0 # Specific cases
 #
--- a/tools/unicodetools/com/ibm/text/UCD/Normalizer.java
+++ b/tools/unicodetools/com/ibm/text/UCD/Normalizer.java
@ -1,665 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Normalizer.java,v $
 * $Date: 2006/09/24 23:32:44 $
 * $Revision: 1.18 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCD;
 import java.util.*;
 import com.ibm.icu.dev.test.util.UnicodeMap;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.text.utility.*;
 import com.sun.java_cup.internal.internal_error;
 /**
 * Implements Unicode Normalization Forms C, D, KC, KD.<br>
 * See UTR#15 for details.<br>
 * Copyright © 1998-1999 Unicode, Inc. All Rights Reserved.<br>
 * The Unicode Consortium makes no expressed or implied warranty of any
 * kind, and assumes no liability for errors or omissions.
 * No liability is assumed for incidental and consequential damages
 * in connection with or arising out of the use of the information here.
 * @author Mark Davis
 */
 public final class Normalizer implements UCD_Types {
    public static final String copyright =
      "Copyright (C) 2000, IBM Corp. and others. All Rights Reserved.";
    public static boolean SHOW_PROGRESS = false;
    /**
     * Create a normalizer for a given form.
     */
    public Normalizer(byte form, String unicodeVersion) {
        this.form = form;
        this.composition = (form & NF_COMPOSITION_MASK) != 0;
        this.compatibility = (form & NF_COMPATIBILITY_MASK) != 0;
        this.data = getData(unicodeVersion);
    }
    /**
     * Create a normalizer for a given form.
     */
    // public Normalizer(byte form) {
    //    this(form,"");
    //}
    /**
     * Return string name
     */
    public static String getName(byte form) {
        return UCD_Names.NF_NAME[form];
    }
    /**
     * Return string name
     */
    public String getName() {
        return getName(form);
    }
    /**
     * Return string name
     */
    public String getUCDVersion() {
        return data.getUCDVersion();
    }
    /**
     * Does compose?
     */
    public boolean isComposition() {
        return composition;
    }
    /**
     * Does compose?
     */
    public boolean isCompatibility() {
        return compatibility;
    }
    /**
    * Normalizes text according to the chosen form,
    * replacing contents of the target buffer.
    * @param   source      the original text, unnormalized
    * @param   target      the resulting normalized text
    */
    public StringBuffer normalize(String source, StringBuffer target) {
        // First decompose the source into target,
        // then compose if the form requires.
        if (source.length() != 0) {
            internalDecompose(source, target, true, compatibility);
            if (composition) {
                internalCompose(target);
            }
        }
        return target;
    }
    /**
    * Normalizes text according to the chosen form,
    * replacing contents of the target buffer.
    * @param   source      the original text, unnormalized
    * @param   target      the resulting normalized text
    */
    public boolean isFCD(String source) {
        if (source.length() == 0) return true;
        StringBuffer noReorder = new StringBuffer();
        StringBuffer reorder = new StringBuffer();
        internalDecompose(source, noReorder, false, false);
        internalDecompose(source, reorder, true, false);
        return reorder.toString().equals(noReorder.toString());
    }
    /**
    * Normalizes text according to the chosen form
    * @param   source      the original text, unnormalized
    * @return  target      the resulting normalized text
    */
    public String normalize(String source) {
        return normalize(source, new StringBuffer()).toString();
    }
    /**
    * Normalizes text according to the chosen form
    * @param   newLocaleID      the original text, unnormalized
    * @return  target      the resulting normalized text
    */
    public String normalize(int cp) {
        return normalize(UTF16.valueOf(cp));
    }
    /**
    private StringBuffer hasDecompositionBuffer = new StringBuffer();
    public boolean hasDecomposition(int cp) {
        hasDecompositionBuffer.setLength(0);
        normalize(UTF16.valueOf(cp), hasDecompositionBuffer);
        if (hasDecompositionBuffer.length() != 1) return true;
        return cp != hasDecompositionBuffer.charAt(0);
    }
    */
    /**
     * Does a quick check to see if the string is in the current form. Checks canonical order and
     * isAllowed().
     * @param   newLocaleID  source text
     * @return YES, NO, MAYBE
     */
     /*
    public static final int NO = 0, YES = 1, MAYBE = -1;
    public int quickCheck(String source) {
        short lastCanonicalClass = 0;
        int result = YES;
        for (int i = 0; i < source.length(); ++i) {
            char ch = source.charAt(i);
            short canonicalClass = data.getCanonicalClass(ch);
            if (lastCanonicalClass > canonicalClass && canonicalClass != 0) {
                return NO;
            }
            int check = isAllowed(ch);
            if (check == NO) return NO;
            if (check == MAYBE) result = MAYBE;
        }
        return result;
    }
    /**
     * Find whether the given character is allowed in the current form.
     * @return YES, NO, MAYBE
     */
     /*
    public int isAllowed(char ch) {
        if (composition) {
            if (compatibility) {
                if (data.isCompatibilityExcluded(ch)) {
                    return NO;
                }
            } else {
                if (data.isExcluded(ch)) {
                    return NO;
                }
            }
            if (data.isTrailing(ch)) {
                return MAYBE;
            }
        } else { // decomposition: both NFD and NFKD
            if (data.normalizationDiffers(compatibility,ch)) return NO;
        }
        return YES;
    }
    /**
    * Utility: Gets the combining class of a character from the
    * Unicode Character Database. Only a byte is needed, but since they are signed in Java
    * return an int to forstall problems.
    * @param   ch      the source character
    * @return          value from 0 to 255
    */
    public short getCanonicalClass(int ch) {
        return data.getCanonicalClass(ch);
    }
    /**
    * Utility: Checks whether there is a recursive decomposition of a character from the
    * Unicode Character Database. It is compatibility or canonical according to the particular
    * normalizer.
    * @param   ch      the source character
    */
    public boolean isNormalized(int ch) {
        return !data.normalizationDiffers(ch, composition, compatibility);
    }
    /**
    * Utility: Checks whether there is a recursive decomposition of a character from the
    * Unicode Character Database. It is compatibility or canonical according to the particular
    * normalizer.
    * @param   ch      the source character
    */
    public boolean isNormalized(String s) {
        if (UTF16.countCodePoint(s) > 1) {
            return !data.normalizationDiffers(UTF16.charAt(s,0), composition, compatibility);
        }
        return s.equals(normalize(s)); // TODO: OPTIMIZE LATER
    }
    /**
    * Utility: Gets recursive decomposition of a character from the
    * Unicode Character Database.
    * @param   compatibility    If false selects the recursive
    *                  canonical decomposition, otherwise selects
    *                  the recursive compatibility AND canonical decomposition.
    * @param   ch      the source character
    * @param   buffer  buffer to be filled with the decomposition
    */
    public void getRecursiveDecomposition(char ch, StringBuffer buffer) {
        data.getRecursiveDecomposition(ch, buffer, compatibility);
    }
    /**
    * Utility: Gets composition mapping.
    * @return IntEnumeration with the pair -> value mapping, where the
    * pair is firstChar << 16 | secondChar.
    * Will need to be fixed for surrogates.
    */
    public void getCompositionStatus(BitSet leading, BitSet trailing, BitSet resulting) {
        Iterator it = data.compTable.keySet().iterator();
        while (it.hasNext()) {
            Long key = (Long)it.next();
            Integer result = (Integer)data.compTable.get(key);
            long keyLong = key.longValue();
            if (leading != null) leading.set((int)(keyLong >>> 32));
            if (trailing != null) trailing.set((int)keyLong);
            if (resulting != null) resulting.set(result.intValue());
        }
        for (int i = UCD.LBase; i < UCD.TLimit; ++i) {
            if (leading != null && UCD.isLeadingJamo(i)) leading.set(i); // set all initial Jamo (that form syllables)
            if (trailing != null && UCD.isNonLeadJamo(i)) trailing.set(i); // set all final Jamo (that form syllables)
        }
        if (leading != null) {
            for (int i = UCD.SBase; i < UCD.SLimit; ++i) {
                if (UCD.isDoubleHangul(i)) leading.set(i); // set all two-Jamo syllables
            }
        }
    }
    public boolean isTrailing(int cp) {
        return this.composition ? data.isTrailing(cp) : false;
    }
    public boolean isLeading(int cp) {
        return this.composition ? data.isLeading(cp) : false;
    }
    public int getComposition(int first, int second) {
        return data.getPairwiseComposition(first, second);
    }
    // ======================================
    //                  PRIVATES
    // ======================================
    /**
     * The current form.
     */
    private byte form;
    private boolean composition;
    private boolean compatibility;
    private UnicodeMap substituteMapping;
    /**
    * Decomposes text, either canonical or compatibility,
    * replacing contents of the target buffer.
    * @param   form        the normalization form. If NF_COMPATIBILITY_MASK
    *                      bit is on in this byte, then selects the recursive
    *                      compatibility decomposition, otherwise selects
    *                      the recursive canonical decomposition.
    * @param   source      the original text, unnormalized
    * @param   target      the resulting normalized text
    */
    private void internalDecompose(String source, StringBuffer target, boolean reorder, boolean compat) {
        StringBuffer buffer = new StringBuffer();
        int ch32;
        for (int i = 0; i < source.length(); i += UTF16.getCharCount(ch32)) {
            buffer.setLength(0);
            ch32 = UTF16.charAt(source, i);
            String sub = substituteMapping == null ? null : (String) substituteMapping.getValue(ch32);
            if (sub != null) {
                buffer.append(sub);
            } else {
                data.getRecursiveDecomposition(ch32, buffer, compat);
            }
            // add all of the characters in the decomposition.
            // (may be just the original character, if there was
            // no decomposition mapping)
            int ch;
            for (int j = 0; j < buffer.length(); j += UTF16.getCharCount(ch)) {
                ch = UTF16.charAt(buffer, j);
                int chClass = data.getCanonicalClass(ch);
                int k = target.length(); // insertion point
                if (chClass != 0 && reorder) {
                    // bubble-sort combining marks as necessary
                    int ch2;
                    for (; k > 0; k -= UTF16.getCharCount(ch2)) {
                        ch2 = UTF16.charAt(target, k-1);
                        if (data.getCanonicalClass(ch2) <= chClass) break;
                    }
                }
                target.insert(k, UTF16.valueOf(ch));
            }
        }
    }
    /**
    * Composes text in place. Target must already
    * have been decomposed.
    * Uses UTF16, which is a utility class for supplementary character support in Java.
    * @param   target      input: decomposed text.
    *                      output: the resulting normalized text.
    */
    private void internalCompose(StringBuffer target) {
        int starterPos = 0;
        int starterCh = UTF16.charAt(target,0);
        int compPos = UTF16.getCharCount(starterCh); // length of last composition
        int lastClass = data.getCanonicalClass(starterCh);
        if (lastClass != 0) lastClass = 256; // fix for strings staring with a combining mark
        int oldLen = target.length();
        // Loop on the decomposed characters, combining where possible
        int ch;
        for (int decompPos = compPos; decompPos < target.length(); decompPos += UTF16.getCharCount(ch)) {
            ch = UTF16.charAt(target, decompPos);
            if (SHOW_PROGRESS) System.out.println(Utility.hex(target)
                + ", decompPos: " + decompPos
                + ", compPos: " + compPos
                + ", ch: " + Utility.hex(ch)
                );
            int chClass = data.getCanonicalClass(ch);
            int composite = data.getPairwiseComposition(starterCh, ch);
            if (composite != data.NOT_COMPOSITE
            && (lastClass < chClass || lastClass == 0)) {
                UTF16.setCharAt(target, starterPos, composite);
                // we know that we will only be replacing non-supplementaries by non-supplementaries
                // so we don't have to adjust the decompPos
                starterCh = composite;
            } else {
                if (chClass == 0) {
                    starterPos = compPos;
                    starterCh  = ch;
                }
                lastClass = chClass;
                UTF16.setCharAt(target, compPos, ch);
                if (target.length() != oldLen) { // MAY HAVE TO ADJUST!
                    System.out.println("ADJUSTING: " + Utility.hex(target));
                    decompPos += target.length() - oldLen;
                    oldLen = target.length();
                }
                compPos += UTF16.getCharCount(ch);
            }
        }
        target.setLength(compPos);
    }
    static class Stub {
        private UCD ucd;
        private HashMap compTable = new HashMap();
        private BitSet isSecond = new BitSet();
        private BitSet isFirst = new BitSet();
        private BitSet canonicalRecompose = new BitSet();
        private BitSet compatibilityRecompose = new BitSet();
        static final int NOT_COMPOSITE = 0xFFFF;
        Stub(String version) {
            ucd = UCD.make(version);
            for (int i = 0; i < 0x10FFFF; ++i) {
                if (!ucd.isAssigned(i)) continue;
                if (ucd.isPUA(i)) continue;
                if (ucd.isNonLeadJamo(i)) isSecond.set(i);
                if (ucd.isLeadingJamoComposition(i)) isFirst.set(i);
                byte dt = ucd.getDecompositionType(i);
                if (dt != CANONICAL) continue;
                if (!ucd.getBinaryProperty(i, CompositionExclusion)) {
                    try {
                        String s = ucd.getDecompositionMapping(i);
                        int len = UTF16.countCodePoint(s);
                        if (len != 2) {
                            if (len > 2) {
                                if (ucd.getVersion().compareTo("3.0.0") >= 0) {
                                    throw new IllegalArgumentException("BAD LENGTH: " + len + ucd.toString(i));
                                }
                            }
                            continue;
                        }
                        int a = UTF16.charAt(s, 0);
                        if (ucd.getCombiningClass(a) != 0) continue;
                        isFirst.set(a);
                        int b = UTF16.charAt(s, UTF16.getCharCount(a));
                        isSecond.set(b);
                        // have a recomposition, so set the bit
                        canonicalRecompose.set(i);
                        // set the compatibility recomposition bit
                        // ONLY if the component characters
                        // don't compatibility decompose
                        if (ucd.getDecompositionType(a) <= CANONICAL
                         && ucd.getDecompositionType(b) <= CANONICAL) {
                            compatibilityRecompose.set(i);
                         }
                        long key = (((long)a)<<32) | b;
                        /*if (i == '\u1E0A' || key == 0x004400000307) {
                            System.out.println(Utility.hex(s));
                            System.out.println(Utility.hex(i));
                            System.out.println(Utility.hex(key));
                        }*/
                        compTable.put(new Long(key), new Integer(i));
                    } catch (Exception e) {
                        throw new ChainException("Error: {0}", new Object[]{ucd.toString(i)}, e);
                    }
                }
            }
            // process compatibilityRecompose
            // have to do this afterwards, since we don't know whether the pieces
            // are allowable until we have processed all the characters
            /*
            Iterator it = compTable.keySet().iterator();
            while (it.hasNext()) {
                Long key = (Long)it.next();
                int cp = compTable.get(key);
                long keyLong = key.longValue();
                int first = (int)(keyLong >>> 32);
                int second = (int)keyLong;
                if (ucd.
            */
        }
        String getUCDVersion() {
        	return ucd.getVersion();
        }
        /*
 Problem: differs: true, call: false U+0385 GREEK DIALYTIKA TONOS
 Problem: differs: true, call: false U+03D3 GREEK UPSILON WITH ACUTE AND HOOK SYMBOL
 Problem: differs: true, call: false U+03D4 GREEK UPSILON WITH DIAERESIS AND HOOK SYMBOL
 Problem: differs: true, call: false U+1E9B LATIN SMALL LETTER LONG S WITH DOT ABOVE
 Problem: differs: true, call: false U+1FC1 GREEK DIALYTIKA AND PERISPOMENI
 Problem: differs: true, call: false U+1FCD GREEK PSILI AND VARIA
 Problem: differs: true, call: false U+1FCE GREEK PSILI AND OXIA
 Problem: differs: true, call: false U+1FCF GREEK PSILI AND PERISPOMENI
 Problem: differs: true, call: false U+1FDD GREEK DASIA AND VARIA
 Problem: differs: true, call: false U+1FDE GREEK DASIA AND OXIA
 Problem: differs: true, call: false U+1FDF GREEK DASIA AND PERISPOMENI
 Problem: differs: true, call: false U+1FED GREEK DIALYTIKA AND VARIA
 */
        short getCanonicalClass(int cp) {
            return ucd.getCombiningClass(cp);
        }
        boolean isTrailing(int cp) {
            return isSecond.get(cp);
        }
        boolean isLeading(int cp) {
            return isFirst.get(cp);
        }
        boolean normalizationDiffers(int cp, boolean composition, boolean compat) {
            byte dt = ucd.getDecompositionType(cp);
            if (!composition) {
                if (compat) return dt >= CANONICAL;
                else return dt == CANONICAL;
            } else {
                // almost the same, except that we add back in the characters
                // that RECOMPOSE
                if (compat) return dt >= CANONICAL && !compatibilityRecompose.get(cp);
                else return dt == CANONICAL && !canonicalRecompose.get(cp);
            }
        }
        public void getRecursiveDecomposition(int cp, StringBuffer buffer, boolean compat) {
            byte dt = ucd.getDecompositionType(cp);
            // we know we decompose all CANONICAL, plus > CANONICAL if compat is TRUE.
            if (dt == CANONICAL || dt > CANONICAL && compat) {
                String s = ucd.getDecompositionMapping(cp);
                if (s.equals(UTF16.valueOf(cp))) {
                    System.out.println("fix");
                }
                for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
                    cp = UTF16.charAt(s, i);
                    getRecursiveDecomposition(cp, buffer, compat);
                }
            } else {
                UTF16.append(buffer, cp);
            }
        }
        int getPairwiseComposition(int starterCh, int ch) {
            int hangulPoss = UCD.composeHangul(starterCh, ch);
            if (hangulPoss != 0xFFFF) return hangulPoss;
            Object obj = compTable.get(new Long((((long)starterCh)<<32) | ch));
            if (obj == null) return 0xFFFF;
            return ((Integer)obj).intValue();
        }
    }
    /**
    * Contains normalization data from the Unicode Character Database.
    * use false for the minimal set, true for the real set.
    */
    private Stub data;
    private static HashMap versionCache = new HashMap();
    private static Stub getData (String version) {
        if (version.length() == 0) version = UCD.latestVersion;
        Stub result = (Stub)versionCache.get(version);
        if (result == null) {
            result = new Stub(version);
            versionCache.put(version, result);
        }
        return result;
    }
    public UnicodeMap getSubstituteMapping() {
        return substituteMapping;
    }
    public Normalizer setSubstituteMapping(UnicodeMap substituteMapping) {
        this.substituteMapping = substituteMapping;
        return this;
    }
    static UnicodeMap spacingMap;;
    public void setSpacingSubstitute() {
        if (spacingMap == null) {
            makeSpacingMap();
        }
        setSubstituteMapping(spacingMap);
    }
    private void makeSpacingMap() {
        spacingMap = new UnicodeMap();
       StringBuffer b = new StringBuffer();
       main:
       for (int i = 0; i <= 0x10FFFF; ++i) {
           boolean compat = data.ucd.getDecompositionType(i) >= data.ucd.CANONICAL; 
           if (!compat) continue;
           b.setLength(0);
           data.getRecursiveDecomposition(i, b, true);
           if (b.length() == 1) continue;
           char firstChar = b.charAt(0);
           if (firstChar != 0x20 && firstChar != '\u0640') continue;
           // if rest are just Mn or Me marks, then add to substitute mapping
           int cp;
           for (int j = 1; j < b.length(); j += UTF16.getCharCount(cp)) {
               cp = UTF16.charAt(b,j);
               int cat = data.ucd.getCategory(cp);
               if (cat != data.ucd.Mn && cat != data.ucd.Me) continue main;
           }
           spacingMap.put(i, UTF16.valueOf(i));
        }
        String[][] specials = {
                {"[\\u0384\\u1FFD]", "\u00B4"},
                {"[\\uFFE3]", "\u00AF"},
                {"[\\uFE49-\\uFE4C]", "\u203E"},
                {"[\\u1FED]", "\u00A8\u0300"},
                {"[\\u1FEE\\u0385]", "\u00A8\u0301"},
                {"[\\u1FC1]", "\u00A8\u0342"},
                {"[\\u1FBD]", "\u1FBF"},
                {"[\\u1FCD]", "\u1FBF\u0300"},
                {"[\\u1FCE]", "\u1FBF\u0301"},
                {"[\\u1FCF]", "\u1FBF\u0342"},
                {"[\\u1FDD]", "\u1FFE\u0300"},
                {"[\\u1FDE]", "\u1FFE\u0301"},
                {"[\\u1FDF]", "\u1FFE\u0342"},
                {"[\\uFC5E]", "\uFE72\u0651"},
                {"[\\uFC5F]", "\uFE74\u0651"},
                {"[\\uFC60]", "\uFE76\u0651"},
                {"[\\uFC61]", "\uFE78\u0651"},
                {"[\\uFC62]", "\uFE7A\u0651"},
                {"[\\uFC63]", "\uFE7C\u0670"},
                {"[\\uFCF2]", "\uFE77\u0651"},
                {"[\\uFCF3]", "\uFE79\u0651"},
                {"[\\uFCF4]", "\uFE7B\u0651"},
            };
            int count = 0;
            UnicodeSet mappedChars = spacingMap.keySet();
            for (int i = 0; i < specials.length; ++i) {
                UnicodeSet source = new UnicodeSet(specials[i][0]);
                if (!mappedChars.containsAll(source)) {
                    throw new InternalError("Remapping character that doesn't need it!" + source);
                }
                spacingMap.putAll(source, specials[i][1]);
                count += source.size();
            }
            spacingMap.freeze();
    }
    /**
    * Just accessible for testing.
    */
    /*
    boolean isExcluded (char ch) {
        return data.isExcluded(ch);
    }
    /**
    * Just accessible for testing.
    */
    /*
    String getRawDecompositionMapping (char ch) {
        return data.getRawDecompositionMapping(ch);
    }
    //*/
 }
--- a/tools/unicodetools/com/ibm/text/UCD/NormalizerSample.java
+++ b/tools/unicodetools/com/ibm/text/UCD/NormalizerSample.java
@ -1,349 +0,0 @@
 package com.ibm.text.UCD;
 import java.util.*;
 import com.ibm.text.*;
 import com.ibm.icu.text.UTF16;
 import com.ibm.text.utility.*;
 /**
 * Implements Unicode Normalization Forms C, D, KC, KD.<br>
 * See UTR#15 for details.<br>
 * Copyright © 1998-1999 Unicode, Inc. All Rights Reserved.<br>
 * The Unicode Consortium makes no expressed or implied warranty of any
 * kind, and assumes no liability for errors or omissions.
 * No liability is assumed for incidental and consequential damages
 * in connection with or arising out of the use of the information here.
 * @author Mark Davis
 */
 public class NormalizerSample implements UCD_Types {
    static final String copyright = "Copyright (C) 2001, IBM Corp. and Unicode Inc. All Rights Reserved.";
    public static boolean SHOW_PROGRESS = false;
    /**
     * Create a normalizer for a given form.
     */
    public NormalizerSample(byte form, String unicodeVersion) {
        this.composition = (form & COMPOSITION_MASK) != 0;
        this.compatibility = (form & COMPATIBILITY_MASK) != 0;
        this.data = getData(unicodeVersion);
    }
    /**
     * Create a normalizer for a given form.
     */
    public NormalizerSample(byte form) {
        this(form,"");
    }
    /**
    * Masks for the form selector
    */
    public static final byte
        COMPATIBILITY_MASK = 1,
        COMPOSITION_MASK = 2;
    /**
    * Normalization Form Selector
    */
    public static final byte
        NFD = 0 ,
        NFKD = COMPATIBILITY_MASK,
        NFC = COMPOSITION_MASK,
        NFKC = (byte)(COMPATIBILITY_MASK + COMPOSITION_MASK);
    /**
    * Normalizes text according to the chosen form,
    * replacing contents of the target buffer.
    * @param   source      the original text, unnormalized
    * @param   target      the resulting normalized text
    */
    public StringBuffer normalize(String source, StringBuffer target) {
        // First decompose the source into target,
        // then compose if the form requires.
        if (source.length() != 0) {
            internalDecompose(source, target);
            if (composition) {
                internalCompose(target);
            }
        }
        return target;
    }
    /**
    * Normalizes text according to the chosen form
    * @param   source      the original text, unnormalized
    * @return  target      the resulting normalized text
    */
    public String normalize(String source) {
        return normalize(source, new StringBuffer()).toString();
    }
    /**
    * Normalizes text according to the chosen form
    * @param   newLocaleID      the original text, unnormalized
    * @return  target      the resulting normalized text
    */
    public String normalize(int cp) {
        return normalize(UTF16.valueOf(cp));
    }
    /**
    */
    private StringBuffer hasDecompositionBuffer = new StringBuffer();
    public boolean hasDecomposition(int cp) {
        hasDecompositionBuffer.setLength(0);
        normalize(UTF16.valueOf(cp), hasDecompositionBuffer);
        if (hasDecompositionBuffer.length() != 1) return true;
        return cp != hasDecompositionBuffer.charAt(0);
    }
    /**
    * Utility: Checks whether there is a recursive decomposition of a character from the
    * Unicode Character Database. It is compatibility or canonical according to the particular
    * normalizer.
    * @param   ch      the source character
    */
    public boolean normalizationDiffers(int ch) {
        return data.normalizationDiffers(ch, composition, compatibility);
    }
    /**
    * Utility: Gets recursive decomposition of a character from the
    * Unicode Character Database.
    * @param   compatibility    If false selects the recursive
    *                  canonical decomposition, otherwise selects
    *                  the recursive compatibility AND canonical decomposition.
    * @param   ch      the source character
    * @param   buffer  buffer to be filled with the decomposition
    */
    public void getRecursiveDecomposition(char ch, StringBuffer buffer) {
        data.getRecursiveDecomposition(ch, buffer, compatibility);
    }
    // ======================================
    //                  PRIVATES
    // ======================================
    /**
     * The current form.
     */
    private boolean composition;
    private boolean compatibility;
    /**
    * Decomposes text, either canonical or compatibility,
    * replacing contents of the target buffer.
    * @param   form        the normalization form. If COMPATIBILITY_MASK
    *                      bit is on in this byte, then selects the recursive
    *                      compatibility decomposition, otherwise selects
    *                      the recursive canonical decomposition.
    * @param   source      the original text, unnormalized
    * @param   target      the resulting normalized text
    */
    private void internalDecompose(String source, StringBuffer target) {
        StringBuffer buffer = new StringBuffer();
        int ch32;
        for (int i = 0; i < source.length(); i += UTF16.getCharCount(ch32)) {
            buffer.setLength(0);
            ch32 = UTF16.charAt(source, i);
            data.getRecursiveDecomposition(ch32, buffer, compatibility);
            // add all of the characters in the decomposition.
            // (may be just the original character, if there was
            // no decomposition mapping)
            int ch;
            for (int j = 0; j < buffer.length(); j += UTF16.getCharCount(ch)) {
                ch = UTF16.charAt(buffer, j);
                int chClass = data.getCanonicalClass(ch);
                int k = target.length(); // insertion point
                if (chClass != 0) {
                    // bubble-sort combining marks as necessary
                    int ch2;
                    for (; k > 0; k -= UTF16.getCharCount(ch2)) {
                        ch2 = UTF16.charAt(target, k-1);
                        if (data.getCanonicalClass(ch2) <= chClass) break;
                    }
                }
                target.insert(k, UTF16.valueOf(ch));
            }
        }
    }
    /**
    * Composes text in place. Target must already
    * have been decomposed.
    * Uses UTF16, which is a utility class for supplementary character support in Java.
    * @param   target      input: decomposed text.
    *                      output: the resulting normalized text.
    */
    private void internalCompose(StringBuffer target) {
        int starterPos = 0;
        int starterCh = UTF16.charAt(target,0);
        int compPos = UTF16.getCharCount(starterCh); // length of last composition
        int lastClass = data.getCanonicalClass(starterCh);
        if (lastClass != 0) lastClass = 256; // fix for strings staring with a combining mark
        int oldLen = target.length();
        // Loop on the decomposed characters, combining where possible
        int ch;
        for (int decompPos = compPos; decompPos < target.length(); decompPos += UTF16.getCharCount(ch)) {
            ch = UTF16.charAt(target, decompPos);
            if (SHOW_PROGRESS) System.out.println(Utility.hex(target)
                + ", decompPos: " + decompPos
                + ", compPos: " + compPos
                + ", ch: " + Utility.hex(ch)
                );
            int chClass = data.getCanonicalClass(ch);
            int composite = data.getPairwiseComposition(starterCh, ch);
            if (composite != data.NOT_COMPOSITE
            && (lastClass < chClass || lastClass == 0)) {
                UTF16.setCharAt(target, starterPos, composite);
                // we know that we will only be replacing non-supplementaries by non-supplementaries
                // so we don't have to adjust the decompPos
                starterCh = composite;
            } else {
                if (chClass == 0) {
                    starterPos = compPos;
                    starterCh  = ch;
                }
                lastClass = chClass;
                UTF16.setCharAt(target, compPos, ch);
                if (target.length() != oldLen) { // MAY HAVE TO ADJUST!
                    System.out.println("ADJUSTING: " + Utility.hex(target));
                    decompPos += target.length() - oldLen;
                    oldLen = target.length();
                }
                compPos += UTF16.getCharCount(ch);
            }
        }
        target.setLength(compPos);
    }
    // The following class makes use of the UCD class, which accesses data in the Unicode Character Database
    static class Stub {
        private UCD ucd;
        private HashMap compTable = new HashMap();
        private BitSet isSecond = new BitSet();
        private BitSet canonicalRecompose = new BitSet();
        private BitSet compatibilityRecompose = new BitSet();
        static final int NOT_COMPOSITE = 0xFFFF;
        Stub(String version) {
            ucd = UCD.make(version);
            for (int i = 0; i < 0x10FFFF; ++i) {
                if (!ucd.isAssigned(i)) continue;
                if (ucd.isPUA(i)) continue;
                if (ucd.isNonLeadJamo(i)) isSecond.set(i);
                byte dt = ucd.getDecompositionType(i);
                if (dt != CANONICAL) continue;
                if (!ucd.getBinaryProperty(i, CompositionExclusion)) {
                    try {
                        String s = ucd.getDecompositionMapping(i);
                        int len = UTF16.countCodePoint(s);
                        if (len != 2) {
                            if (len > 2) throw new IllegalArgumentException("BAD LENGTH: " + len + ucd.toString(i));
                            continue;
                        }
                        int a = UTF16.charAt(s, 0);
                        if (ucd.getCombiningClass(a) != 0) continue;
                        int b = UTF16.charAt(s, UTF16.getCharCount(a));
                        isSecond.set(b);
                        // have a recomposition, so set the bit
                        canonicalRecompose.set(i);
                        // set the compatibility recomposition bit
                        // ONLY if the component characters
                        // don't compatibility decompose
                        if (ucd.getDecompositionType(a) <= CANONICAL
                         && ucd.getDecompositionType(b) <= CANONICAL) {
                            compatibilityRecompose.set(i);
                         }
                        long key = (((long)a)<<32) | b;
                        compTable.put(new Long(key), new Integer(i));
                    } catch (Exception e) {
                        throw new ChainException("Error: {0}", new Object[]{ucd.toString(i)}, e);
                    }
                }
            }
        }
        short getCanonicalClass(int cp) {
            return ucd.getCombiningClass(cp);
        }
        boolean isTrailing(int cp) {
            return isSecond.get(cp);
        }
        boolean normalizationDiffers(int cp, boolean composition, boolean compatibility) {
            byte dt = ucd.getDecompositionType(cp);
            if (!composition) {
                if (compatibility) return dt >= CANONICAL;
                else return dt == CANONICAL;
            } else {
                // almost the same, except that we add back in the characters
                // that RECOMPOSE
                if (compatibility) return dt >= CANONICAL && !compatibilityRecompose.get(cp);
                else return dt == CANONICAL && !canonicalRecompose.get(cp);
            }
        }
        public void getRecursiveDecomposition(int cp, StringBuffer buffer, boolean compatibility) {
            byte dt = ucd.getDecompositionType(cp);
            // we know we decompose all CANONICAL, plus > CANONICAL if compatibility is TRUE.
            if (dt == CANONICAL || dt > CANONICAL && compatibility) {
                String s = ucd.getDecompositionMapping(cp);
                for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
                    cp = UTF16.charAt(s, i);
                    getRecursiveDecomposition(cp, buffer, compatibility);
                }
            } else {
                UTF16.append(buffer, cp);
            }
        }
        int getPairwiseComposition(int starterCh, int ch) {
            int hangulPoss = UCD.composeHangul(starterCh, ch);
            if (hangulPoss != 0xFFFF) return hangulPoss;
            Object obj = compTable.get(new Long((((long)starterCh)<<32) | ch));
            if (obj == null) return 0xFFFF;
            return ((Integer)obj).intValue();
        }
    }
    /**
    * Contains normalization data from the Unicode Character Database.
    * use false for the minimal set, true for the real set.
    */
    private Stub data;
    private static HashMap versionCache = new HashMap();
    private static Stub getData (String version) {
        if (version.length() == 0) version = UCD.latestVersion;
        Stub result = (Stub)versionCache.get(version);
        if (result == null) {
            result = new Stub(version);
            versionCache.put(version, result);
        }
        return result;
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/OldUnicodeMap.java
+++ b/tools/unicodetools/com/ibm/text/UCD/OldUnicodeMap.java
@ -1,109 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/OldUnicodeMap.java,v $
 * $Date: 2005/03/04 02:50:26 $
 * $Revision: 1.1 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCD;
 import java.util.*;
 import java.io.*;
 import com.ibm.text.utility.*;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;
 /**
 * Class that maps from codepoints to an index, and optionally a label.
 */
 public class OldUnicodeMap {
    UnicodeSet[] sets = new UnicodeSet[50];
    String[] labels = new String[50];
    int count = 0;
    public int add(String label, UnicodeSet set) {
        return add(label, set, false, true);
    }
    /**
     * Add set
     *@param removeOld true: remove any collisions from sets already in the map
     * if false, remove any collisions from this set
     *@param signal: print a warning when collisions occur
     */
    public int add(String label, UnicodeSet set, boolean removeOld, boolean signal) {
        // remove from any preceding!!
        for (int i = 0; i < count; ++i) {
            if (!set.containsSome(sets[i])) continue;
            if (signal) showOverlap(label, set, i);
            if (removeOld) {
                sets[i] = sets[i].removeAll(set);
            } else {
                set = set.removeAll(sets[i]);
            }
        }
        sets[count] = set;
        labels[count++] = label;
        return (short)(count - 1);
    }
    public void showOverlap(String label, UnicodeSet set, int i) {
        UnicodeSet delta = new UnicodeSet(set).retainAll(sets[i]);
        System.out.println("Warning! Overlap with " + label + " and " + labels[i]
            + ": " + delta);
    }
    public int getIndex(int codepoint) {
        for (int i = count - 1; i >= 0; --i) {
            if (sets[i].contains(codepoint)) return i;
        }
        return -1;
    }
    public int getIndexFromLabel(String label) {
        for (int i = count - 1; i >= 0; --i) {
            if (labels[i].equalsIgnoreCase(label)) return i;
        }
        return -1;
    }
    public String getLabel(int codepoint) {
        return getLabelFromIndex(getIndex(codepoint));
    }
    public String getLabelFromIndex(int index) {
        if (index < 0 || index >= count) return null;
        return labels[index];
    }
    public UnicodeSet getSetFromIndex(int index) {
        if (index < 0 || index >= count) return null;
        return new UnicodeSet(sets[index]); // protect from changes
    }
    public int size() {
        return count;
    }
    public int setLabel(int index, String label) {
        labels[index] = label;
        return index;
    }
    public int put(int codepoint, int index) {
        if (sets[index] == null) {
            sets[index] = new UnicodeSet();
            if (index >= count) count = index + 1;
        }
        sets[index].add(codepoint);
        return index;
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/ProcessUnihan.java
+++ b/tools/unicodetools/com/ibm/text/UCD/ProcessUnihan.java
@ -1,76 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/ProcessUnihan.java,v $
 * $Date: 2005/03/04 02:50:26 $
 * $Revision: 1.3 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCD;
 import java.io.*;
 import com.ibm.text.utility.*;
 import com.ibm.icu.text.UTF16;
 import java.util.*;
 // stub file, ignore
 public final class ProcessUnihan {
 	/*
    static final boolean TESTING = false;
    static int type;
    public static void main() {
        try {
            type = 0;
            System.out.println("Starting");
            process();
        } catch (Exception e) {
            System.out.println("Exception: " + e);
        }
    }
    static PrintWriter out;
    static PrintWriter err;
    static int count;
    static int oldLine;
    static Map map = new HashMap();
    static Map tags = new HashMap();
    static void process() throws java.io.IOException {
        int lineCounter = 0;
        String[] parts = new String[3];
        //out = Utility.openPrintWriter("Transliterate_Han_English.txt");
        //err = Utility.openPrintWriter("Transliterate_Han_English.log.txt");
        BufferedReader in = Utility.openUnicodeFile("Unihan", "3.2.0", true, Utility.UTF8);
        while (true) {
            Utility.dot(++lineCounter);
            String line = in.readLine();
            if (line == null) break;
            int commentPos = line.indexOf('#');
            if (commentPos >= 0) line = line.substring(0,commentPos);
            line = line.trim();
            if (line.length() == 0) continue;
            int count = Utility.split(line, '#', parts);
            int code = Integer.parseInt(parts[0].substring(2), 16);
            Byte itag = (Byte) tags.get("a");
            if (itag == null) {}
            String tag = parts[1];
            String value = parts[2];
            if (tags.containsKey(tag)) {}
        }
    }
    */
 }
--- a/tools/unicodetools/com/ibm/text/UCD/PropertyAliasesHeader.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/PropertyAliasesHeader.txt
@ -1,41 +0,0 @@
 #
 # This file contains aliases for properties used in the UCD.
 # These names can be used for XML formats of UCD data, for regular-expression
 # property tests, and other programmatic textual descriptions of Unicode data.
 # For information on which properties are normative, see UCD.html.
 #
 # The names may be translated in appropriate environments, and additional
 # aliases may be useful.
 #
 # FORMAT
 #
 # Each line has two or more fields, separated by semicolons.
 #
 # First Field: The first field is an abbreviated name for the property.
 #
 # Second Field: The second field is a long name
 #
 # The above are the preferred aliases. Other aliases may be listed in additional fields.
 #
 # Loose matching should be applied to all property names and property values, with
 # the exception of String Property values. With loose matching of property names and
 # values, the case distinctions, whitespace, and '_' are ignored. For Numeric Property
 # values, numeric equivalencies are applied: thus "01.00" is equivalent to "1".
 #
 # NOTE: Property value names are NOT unique across properties. For example:
 #
 #   AL means Arabic Letter for the Bidi_Class property, and
 #   AL means Alpha_Left for the Combining_Class property, and
 #   AL means Alphabetic for the Line_Break property.
 #
 # In addition, some property names may be the same as some property value names.
 # For example:
 #
 #   sc means the Script property, and
 #   Sc means the General_Category property value Currency_Symbol (Sc)
 #
 # The combination of property value and property name is, however, unique.
 #
 # For more information, see UTS #18: Regular Expression Guidelines
 # ================================================
--- a/tools/unicodetools/com/ibm/text/UCD/PropertyLister.java
+++ b/tools/unicodetools/com/ibm/text/UCD/PropertyLister.java
@ -1,248 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/PropertyLister.java,v $
 * $Date: 2003/03/19 17:30:56 $
 * $Revision: 1.11 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCD;
 import java.io.*;
 import com.ibm.text.utility.*;
 import com.ibm.icu.text.UnicodeSet;
 import java.text.NumberFormat;
 abstract public class PropertyLister implements UCD_Types {
    static final boolean COMPRESS_NAMES = false;
    static final boolean DROP_INDICATORS = true;
    protected UCD ucdData;
    protected PrintWriter output;
    protected boolean showOnConsole;
    protected boolean usePropertyComment = true;
    protected boolean breakByCategory = true;
    protected int firstRealCp = -2;
    protected int lastRealCp = -2;
    protected boolean alwaysBreaks = false; // set to true if property only breaks
    protected boolean commentOut = false;
    protected boolean useKenName = true; // set to false to get meaningful names
    private UnicodeSet set = new UnicodeSet();
    public static final byte INCLUDE = 0, BREAK = 1, CONTINUE = 2, EXCLUDE = 3;
    /**
     * @return status. Also have access to firstRealCp, lastRealCp
     */
    abstract public byte status(int cp);
    public String headerString() {
        return "";
    }
    public String valueName(int cp) {
        return "";
    }
    public String missingValueName() {
        return "";
    }
    public String optionalName(int cp) {
        return "";
    }
    public String optionalComment(int cp) {
        if (!usePropertyComment) return "";
        return ucdData.getModCatID_fromIndex(getModCat(cp));
    }
    public int minPropertyWidth() {
        return 1;
    }
    public void format(int startCp, int endCp, int realCount) {
        try {
            set.add(startCp, endCp);
            String prop = valueName(startCp);
            String opt = "";
            String optCom = "";
            String commentSep = " # ";
            if (commentOut) commentSep = "";
            if (prop.length() > 0) prop = "; " + prop;
            opt = optionalName(startCp);
            if (opt.length() > 0) opt = "; " + opt;
            optCom = optionalComment(startCp);
            if (optCom.length() > 0) optCom += " ";
            String startName = getKenName(startCp);
            String line;
            String pgap = Utility.repeat(" ", minPropertyWidth() - prop.length() - opt.length());
            if (startCp != endCp) {
                String endName = getKenName(endCp);
                int bridge = endCp - startCp + 1 - realCount;
                String count = (bridge == 0) ? "" + realCount : realCount + "/" + bridge;
                String countStr = Utility.repeat(" ", 3-count.length()) + "[" + count + "] ";
                String gap = Utility.repeat(" ", 12 - width(startCp) - width(endCp));
                line = Utility.hex(startCp,4) + ".." + Utility.hex(endCp,4) + gap
                        + prop + opt + pgap + commentSep + optCom
                        + countStr;
                if (startName.length() != 0 || endName.length() != 0) {
                    int com = 0;
                    if (COMPRESS_NAMES) com = commonInitialWords(startName, endName);
                    if (com == 0) {
                        line += startName + ".." + endName;
                    } else {
                        line += startName.substring(0,com)
                            + "(" + startName.substring(com) + ".." + endName.substring(com) + ")";
                    }
                }
            } else {
                String gap = alwaysBreaks
                    ? Utility.repeat(" ", 6 - width(startCp))
                    : Utility.repeat(" ", 14 - width(startCp));
                String gap2 = alwaysBreaks
                    ? " "
                    : "      ";
                line = Utility.hex(startCp,4) + gap
                        + prop + opt + pgap + commentSep + optCom + gap2
                        + startName;
            }
            if (commentOut) {
                line = "# " + line;
            }
            output.println(line);
            if (showOnConsole) System.out.println(line);
        } catch (Exception e) {
            throw new ChainException("Format error {0}, {1}",
                new Object[]{new Integer(startCp), new Integer(endCp)}, e);
        }
    }
    int width(int cp) {
        return cp <= 0xFFFF ? 4
             : cp <= 0xFFFFF ? 5
             : 6;
    }
    String getKenName(int cp) {
        String result = ucdData.getName(cp);
        if (!useKenName) return result;
        if (result == null) return "";
        if (DROP_INDICATORS && result.charAt(0) == '<') {
            if (cp < 0xFF) return "<control>";
            return "";
        }
        return result;
    }
    byte getModCat(int cp) {
    	byte result = ucdData.getModCat(cp, breakByCategory ? CASED_LETTER_MASK : 0);
    	return result;
    }
    /**
     * @return common initial substring length ending with SPACE or HYPHEN-MINUS. 0 if there is none
     */
    public static int commonInitialWords(String a, String b) {
        if (a.length() > b.length()) {
            String temp = a;
            a = b;
            b = temp;
        }
        int lastSpace = 0;
        for (int i = 0; i < a.length(); ++i) {
            char ca = a.charAt(i);
            char cb = b.charAt(i);
            if (ca != cb) return lastSpace;
            if (ca == ' ' || ca == '-') lastSpace = i + 1;
        }
        if (b.length() == a.length() || b.charAt(a.length()) == ' ' || b.charAt(a.length()) == '-') {
            lastSpace = a.length();
        }
        return lastSpace;
    }
    public int print() {
        set.clear();
        int count = 0;
        firstRealCp = -1;
        byte firstRealCpCat = -1;
        lastRealCp = -1;
        int realRangeCount = 0;
        String header = headerString();
        if (header.length() != 0) {
            // System.out.println(header);
            output.println(header);
            output.println();
        }
        for (int cp = 0; cp <= 0x10FFFF; ++cp) {
            byte s = status(cp);
            if (alwaysBreaks && s == INCLUDE) s = BREAK;
            if (s == INCLUDE && firstRealCp != -1) {
                if (getModCat(cp) != firstRealCpCat) s = BREAK;
            }
            switch(s) {
              case CONTINUE:
                break; // do nothing
              case INCLUDE:
                if (firstRealCp == -1) {
                    firstRealCp = cp;
                    firstRealCpCat = getModCat(firstRealCp);
                }
                lastRealCp = cp;
                count++;
                realRangeCount++;
                break;
              case BREAK:
                if (firstRealCp != -1) {
                    format(firstRealCp, lastRealCp, realRangeCount);
                }
                lastRealCp = firstRealCp = cp;
                firstRealCpCat = getModCat(firstRealCp);
                realRangeCount = 1;
                count++;
                break;
              case EXCLUDE:
                if (firstRealCp != -1) {
                    format(firstRealCp, lastRealCp, realRangeCount);
                    firstRealCp = -1;
                    realRangeCount = 0;
                }
                break;
            }
        }
        if (firstRealCp != -1) {
            format(firstRealCp, lastRealCp, realRangeCount);
        }
        if (count == 0) {
            output.println("# No values for " + missingValueName());
            System.out.println("ZERO COUNT for " + missingValueName());
        }
        NumberFormat nf = NumberFormat.getInstance();
        nf.setMaximumFractionDigits(0);
        nf.setGroupingUsed(false);
        output.println();
        output.println("# Total code points: " + nf.format(count));
        output.println();
        //System.out.println(headerString());
        //System.out.println(set.toPattern(true));
        return count;
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/PropertyValueAliasesHeader.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/PropertyValueAliasesHeader.txt
@ -1,49 +0,0 @@
 #
 # This file contains aliases for property values used in the UCD.
 # These names can be used for XML formats of UCD data, for regular-expression
 # property tests, and other programmatic textual descriptions of Unicode data.
 # For information on which properties are normative, see UCD.html.
 #
 # The names may be translated in appropriate environments, and additional
 # aliases may be useful.
 #
 # FORMAT
 #
 # Each line describes a property value name.
 # This consists of three or more fields, separated by semicolons.
 #
 # First Field: The first field describes the property for which that
 # property value name is used.
 #
 # Second Field: The second field is an abbreviated name.
 # If there is no abbreviated name available, the field is marked with "n/a".
 #
 # Third Field: The third field is a long name.
 #
 # In the case of ccc, there are 4 fields. The second field is numeric, third
 # is abbreviated, and fourth is long.
 #
 # The above are the preferred aliases. Other aliases may be listed in additional fields.
 #
 # Loose matching should be applied to all property names and property values, with
 # the exception of String Property values. With loose matching of property names and
 # values, the case distinctions, whitespace, and '_' are ignored. For Numeric Property
 # values, numeric equivalencies are applied: thus "01.00" is equivalent to "1".
 #
 # NOTE: Property value names are NOT unique across properties. For example:
 #
 #   AL means Arabic Letter for the Bidi_Class property, and
 #   AL means Alpha_Left for the Combining_Class property, and
 #   AL means Alphabetic for the Line_Break property.
 #
 # In addition, some property names may be the same as some property value names.
 # For example:
 #
 #   sc means the Script property, and
 #   Sc means the General_Category property value Currency_Symbol (Sc)
 #
 # The combination of property value and property name is, however, unique.
 #
 # For more information, see UTS #18: Regular Expression Guidelines
 # ================================================
--- a/tools/unicodetools/com/ibm/text/UCD/QuickTest.java
+++ b/tools/unicodetools/com/ibm/text/UCD/QuickTest.java
--- a/tools/unicodetools/com/ibm/text/UCD/ScriptExceptions.java
+++ b/tools/unicodetools/com/ibm/text/UCD/ScriptExceptions.java
@ -1,266 +0,0 @@
 package com.ibm.text.UCD;
 import com.ibm.text.utility.*;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;
 public class ScriptExceptions {
  public static UnicodeSet getExceptions() {
    UnicodeSet contents = new UnicodeSet();
    //     "FAIL: " => "contents.add(0x"
    //      ";" => ");//"
    //      ".." => ", 0x"
 contents.add(0x005E);//           COMMON     # (Sk) CIRCUMFLEX ACCENT
 contents.add(0x0060);//           COMMON     # (Sk) GRAVE ACCENT
 contents.add(0x00A8);//           COMMON     # (Sk) DIAERESIS
 contents.add(0x00AF);//           COMMON     # (Sk) MACRON
 contents.add(0x00B4);//           COMMON     # (Sk) ACUTE ACCENT
 contents.add(0x00B8);//           COMMON     # (Sk) CEDILLA
 contents.add(0x02B9, 0x02BA);//     COMMON     # (Sk) MODIFIER LETTER PRIME, 0xMODIFIER LETTER DOUBLE PRIME
 contents.add(0x02C2, 0x02CF);//     COMMON     # (Sk) MODIFIER LETTER LEFT ARROWHEAD, 0xMODIFIER LETTER LOW ACUTE ACCENT
 contents.add(0x02D2, 0x02DF);//     COMMON     # (Sk) MODIFIER LETTER CENTRED RIGHT HALF RING, 0xMODIFIER LETTER CROSS ACCENT
 contents.add(0x02E5, 0x02ED);//     COMMON     # (Sk) MODIFIER LETTER EXTRA-HIGH TONE BAR, 0xMODIFIER LETTER UNASPIRATED
 contents.add(0x0374, 0x0375);//     COMMON     # (Sk) GREEK NUMERAL SIGN, 0xGREEK LOWER NUMERAL SIGN
 contents.add(0x0384, 0x0385);//     COMMON     # (Sk) GREEK TONOS, 0xGREEK DIALYTIKA TONOS
 contents.add(0x1FBD);//           COMMON     # (Sk) GREEK KORONIS
 contents.add(0x1FBF, 0x1FC1);//     COMMON     # (Sk) GREEK PSILI, 0xGREEK DIALYTIKA AND PERISPOMENI
 contents.add(0x1FCD, 0x1FCF);//     COMMON     # (Sk) GREEK PSILI AND VARIA, 0xGREEK PSILI AND PERISPOMENI
 contents.add(0x1FDD, 0x1FDF);//     COMMON     # (Sk) GREEK DASIA AND VARIA, 0xGREEK DASIA AND PERISPOMENI
 contents.add(0x1FED, 0x1FEF);//     COMMON     # (Sk) GREEK DIALYTIKA AND VARIA, 0xGREEK VARIA
 contents.add(0x1FFD, 0x1FFE);//     COMMON     # (Sk) GREEK OXIA, 0xGREEK DASIA
 contents.add(0x309B, 0x309C);//     COMMON     # (Sk) KATAKANA-HIRAGANA VOICED SOUND MARK, 0xKATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
 contents.add(0xFF3E);//           COMMON     # (Sk) FULLWIDTH CIRCUMFLEX ACCENT
 contents.add(0xFF40);//           COMMON     # (Sk) FULLWIDTH GRAVE ACCENT
 contents.add(0xFFE3);//           COMMON     # (Sk) FULLWIDTH MACRON
    contents.add(0x0640);//           COMMON     # (Lm) ARABIC TATWEEL
    contents.add(0x3006);//           COMMON     # (Lo) IDEOGRAPHIC CLOSING MARK
    contents.add(0x303C);//           COMMON     # (Lo) MASU MARK
    contents.add(0x2135, 0x2138);//     COMMON     # (Lo) ALEF SYMBOL..DALET SYMBOL
    contents.add(0x1714);//           TAGALOG    # (Mn) TAGALOG SIGN VIRAMA
    contents.add(0x1734);//           HANUNOO    # (Mn) HANUNOO SIGN PAMUDPOD
    //contents.add(0x0F3E, 0x0F3F);//     COMMON     # (Mc) TIBETAN SIGN YAR TSHES, 0xTIBETAN SIGN MAR TSHES
    contents.add(0x2071);//           COMMON     # (LC) SUPERSCRIPT LATIN SMALL LETTER I
    contents.add(0x2102);//           COMMON     # (LC) DOUBLE-STRUCK CAPITAL C
    contents.add(0x2107);//           COMMON     # (LC) EULER CONSTANT
    contents.add(0x210A, 0x2113);//     COMMON     # (LC) SCRIPT SMALL G, 0xSCRIPT SMALL L
    contents.add(0x2115);//           COMMON     # (LC) DOUBLE-STRUCK CAPITAL N
    contents.add(0x2119, 0x211D);//     COMMON     # (LC) DOUBLE-STRUCK CAPITAL P, 0xDOUBLE-STRUCK CAPITAL R
    contents.add(0x2124);//           COMMON     # (LC) DOUBLE-STRUCK CAPITAL Z
    contents.add(0x2128);//           COMMON     # (LC) BLACK-LETTER CAPITAL Z
    contents.add(0x212C, 0x212D);//     COMMON     # (LC) SCRIPT CAPITAL B, 0xBLACK-LETTER CAPITAL C
    contents.add(0x212F, 0x2131);//     COMMON     # (LC) SCRIPT SMALL E, 0xSCRIPT CAPITAL F
    contents.add(0x2133, 0x2134);//     COMMON     # (LC) SCRIPT CAPITAL M, 0xSCRIPT SMALL O
    contents.add(0x2139);//           COMMON     # (LC) INFORMATION SOURCE
    contents.add(0x213D, 0x213F);//     COMMON     # (LC) DOUBLE-STRUCK SMALL GAMMA, 0xDOUBLE-STRUCK CAPITAL PI
    contents.add(0x2145, 0x2149);//     COMMON     # (LC) DOUBLE-STRUCK ITALIC CAPITAL D, 0xDOUBLE-STRUCK ITALIC SMALL J
    contents.add(0x1D400, 0x1D454);//   COMMON     # (LC) MATHEMATICAL BOLD CAPITAL A, 0xMATHEMATICAL ITALIC SMALL G
    contents.add(0x1D456, 0x1D49C);//   COMMON     # (LC) MATHEMATICAL ITALIC SMALL I, 0xMATHEMATICAL SCRIPT CAPITAL A
    contents.add(0x1D49E, 0x1D49F);//   COMMON     # (LC) MATHEMATICAL SCRIPT CAPITAL C, 0xMATHEMATICAL SCRIPT CAPITAL D
    contents.add(0x1D4A2);//          COMMON     # (LC) MATHEMATICAL SCRIPT CAPITAL G
    contents.add(0x1D4A5, 0x1D4A6);//   COMMON     # (LC) MATHEMATICAL SCRIPT CAPITAL J, 0xMATHEMATICAL SCRIPT CAPITAL K
    contents.add(0x1D4A9, 0x1D4AC);//   COMMON     # (LC) MATHEMATICAL SCRIPT CAPITAL N, 0xMATHEMATICAL SCRIPT CAPITAL Q
    contents.add(0x1D4AE, 0x1D4B9);//   COMMON     # (LC) MATHEMATICAL SCRIPT CAPITAL S, 0xMATHEMATICAL SCRIPT SMALL D
    contents.add(0x1D4BB);//          COMMON     # (LC) MATHEMATICAL SCRIPT SMALL F
    contents.add(0x1D4BD, 0x1D4C0);//   COMMON     # (LC) MATHEMATICAL SCRIPT SMALL H, 0xMATHEMATICAL SCRIPT SMALL K
    contents.add(0x1D4C2, 0x1D4C3);//   COMMON     # (LC) MATHEMATICAL SCRIPT SMALL M, 0xMATHEMATICAL SCRIPT SMALL N
    contents.add(0x1D4C5, 0x1D505);//   COMMON     # (LC) MATHEMATICAL SCRIPT SMALL P, 0xMATHEMATICAL FRAKTUR CAPITAL B
    contents.add(0x1D507, 0x1D50A);//   COMMON     # (LC) MATHEMATICAL FRAKTUR CAPITAL D, 0xMATHEMATICAL FRAKTUR CAPITAL G
    contents.add(0x1D50D, 0x1D514);//   COMMON     # (LC) MATHEMATICAL FRAKTUR CAPITAL J, 0xMATHEMATICAL FRAKTUR CAPITAL Q
    contents.add(0x1D516, 0x1D51C);//   COMMON     # (LC) MATHEMATICAL FRAKTUR CAPITAL S, 0xMATHEMATICAL FRAKTUR CAPITAL Y
    contents.add(0x1D51E, 0x1D539);//   COMMON     # (LC) MATHEMATICAL FRAKTUR SMALL A, 0xMATHEMATICAL DOUBLE-STRUCK CAPITAL B
    contents.add(0x1D53B, 0x1D53E);//   COMMON     # (LC) MATHEMATICAL DOUBLE-STRUCK CAPITAL D, 0xMATHEMATICAL DOUBLE-STRUCK CAPITAL G
    contents.add(0x1D540, 0x1D544);//   COMMON     # (LC) MATHEMATICAL DOUBLE-STRUCK CAPITAL I, 0xMATHEMATICAL DOUBLE-STRUCK CAPITAL M
    contents.add(0x1D546);//          COMMON     # (LC) MATHEMATICAL DOUBLE-STRUCK CAPITAL O
    contents.add(0x1D54A, 0x1D550);//   COMMON     # (LC) MATHEMATICAL DOUBLE-STRUCK CAPITAL S, 0xMATHEMATICAL DOUBLE-STRUCK CAPITAL Y
    contents.add(0x1D552, 0x1D6A3);//   COMMON     # (LC) MATHEMATICAL DOUBLE-STRUCK SMALL A, 0xMATHEMATICAL MONOSPACE SMALL Z
    contents.add(0x1D6A8, 0x1D6C0);//   COMMON     # (LC) MATHEMATICAL BOLD CAPITAL ALPHA, 0xMATHEMATICAL BOLD CAPITAL OMEGA
    contents.add(0x1D6C2, 0x1D6DA);//   COMMON     # (LC) MATHEMATICAL BOLD SMALL ALPHA, 0xMATHEMATICAL BOLD SMALL OMEGA
    contents.add(0x1D6DC, 0x1D6FA);//   COMMON     # (LC) MATHEMATICAL BOLD EPSILON SYMBOL, 0xMATHEMATICAL ITALIC CAPITAL OMEGA
    contents.add(0x1D6FC, 0x1D714);//   COMMON     # (LC) MATHEMATICAL ITALIC SMALL ALPHA, 0xMATHEMATICAL ITALIC SMALL OMEGA
    contents.add(0x1D716, 0x1D734);//   COMMON     # (LC) MATHEMATICAL ITALIC EPSILON SYMBOL, 0xMATHEMATICAL BOLD ITALIC CAPITAL OMEGA
    contents.add(0x1D736, 0x1D74E);//   COMMON     # (LC) MATHEMATICAL BOLD ITALIC SMALL ALPHA, 0xMATHEMATICAL BOLD ITALIC SMALL OMEGA
    contents.add(0x1D750, 0x1D76E);//   COMMON     # (LC) MATHEMATICAL BOLD ITALIC EPSILON SYMBOL, 0xMATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA
    contents.add(0x1D770, 0x1D788);//   COMMON     # (LC) MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA, 0xMATHEMATICAL SANS-SERIF BOLD SMALL OMEGA
    contents.add(0x1D78A, 0x1D7A8);//   COMMON     # (LC) MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL, 0xMATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA
    contents.add(0x1D7AA, 0x1D7C2);//   COMMON     # (LC) MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA, 0xMATHEMATICAL SANS-SERIF BOLD IT    ALIC SMALL OMEGA
    contents.add(0x1D7C4, 0x1D7C9);//   COMMON     # (LC) MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL, 0xMATHEMATICAL SANS-SERIF BOLD    ITALIC PI SYMBOL
    contents.add(0x02BB, 0x02C1);//     COMMON     # (0xLm) MODIFIER LETTER TURNED COMMA, 0xMODIFIER LETTER REVERSED GLOTTAL STOP
    contents.add(0x02D0, 0x02D1);//     COMMON     # (0xLm) MODIFIER LETTER TRIANGULAR COLON, 0xMODIFIER LETTER HALF TRIANGULAR COLON
    contents.add(0x02EE);//           COMMON     # (0xLm) MODIFIER LETTER DOUBLE APOSTROPHE
    contents.add(0x3031, 0x3035);//     COMMON     # (0xLm) VERTICAL KANA REPEAT MARK, 0xVERTICAL KANA REPEAT MARK LOWER HALF
    contents.add(0x30FC);//           COMMON     # (0xLm) KATAKANA-HIRAGANA PROLONGED SOUND MARK
    contents.add(0xFF70);//           COMMON     # (0xLm) HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
    contents.add(0xFF9E, 0xFF9F);//     COMMON     # (0xLm) HALFWIDTH KATAKANA VOICED SOUND MARK, 0xHALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
    contents.add(0x0483, 0x0486);//     CYRILLIC   # (0xMn) COMBINING CYRILLIC TITLO, 0xCOMBINING CYRILLIC PSILI PNEUMATA
    contents.add(0x0711);//           SYRIAC     # (0xMn) SYRIAC LETTER SUPERSCRIPT ALAPH
    contents.add(0x0730, 0x074A);//     SYRIAC     # (0xMn) SYRIAC PTHAHA ABOVE, 0xSYRIAC BARREKH
    contents.add(0x07A6, 0x07B0);//     THAANA     # (0xMn) THAANA ABAFILI, 0xTHAANA SUKUN
    contents.add(0x0901, 0x0902);//     DEVANAGARI # (0xMn) DEVANAGARI SIGN CANDRABINDU, 0xDEVANAGARI SIGN ANUSVARA
    contents.add(0x093C);//           DEVANAGARI # (0xMn) DEVANAGARI SIGN NUKTA
    contents.add(0x0941, 0x0948);//     DEVANAGARI # (0xMn) DEVANAGARI VOWEL SIGN U, 0xDEVANAGARI VOWEL SIGN AI
    contents.add(0x094D);//           DEVANAGARI # (0xMn) DEVANAGARI SIGN VIRAMA
    contents.add(0x0951, 0x0954);//     DEVANAGARI # (0xMn) DEVANAGARI STRESS SIGN UDATTA, 0xDEVANAGARI ACUTE ACCENT
    contents.add(0x0962, 0x0963);//     DEVANAGARI # (0xMn) DEVANAGARI VOWEL SIGN VOCALIC L, 0xDEVANAGARI VOWEL SIGN VOCALIC LL
    contents.add(0x0981);//           BENGALI    # (0xMn) BENGALI SIGN CANDRABINDU
    contents.add(0x09BC);//           BENGALI    # (0xMn) BENGALI SIGN NUKTA
    contents.add(0x09C1, 0x09C4);//     BENGALI    # (0xMn) BENGALI VOWEL SIGN U, 0xBENGALI VOWEL SIGN VOCALIC RR
    contents.add(0x09CD);//           BENGALI    # (0xMn) BENGALI SIGN VIRAMA
    contents.add(0x09E2, 0x09E3);//     BENGALI    # (0xMn) BENGALI VOWEL SIGN VOCALIC L, 0xBENGALI VOWEL SIGN VOCALIC LL
    contents.add(0x0A02);//           GURMUKHI   # (0xMn) GURMUKHI SIGN BINDI
    contents.add(0x0A3C);//           GURMUKHI   # (0xMn) GURMUKHI SIGN NUKTA
    contents.add(0x0A41, 0x0A42);//     GURMUKHI   # (0xMn) GURMUKHI VOWEL SIGN U, 0xGURMUKHI VOWEL SIGN UU
    contents.add(0x0A47, 0x0A48);//     GURMUKHI   # (0xMn) GURMUKHI VOWEL SIGN EE, 0xGURMUKHI VOWEL SIGN AI
    contents.add(0x0A4B, 0x0A4D);//     GURMUKHI   # (0xMn) GURMUKHI VOWEL SIGN OO, 0xGURMUKHI SIGN VIRAMA
    contents.add(0x0A70, 0x0A71);//     GURMUKHI   # (0xMn) GURMUKHI TIPPI, 0xGURMUKHI ADDAK
    contents.add(0x0A81, 0x0A82);//     GUJARATI   # (0xMn) GUJARATI SIGN CANDRABINDU, 0xGUJARATI SIGN ANUSVARA
    contents.add(0x0ABC);//           GUJARATI   # (0xMn) GUJARATI SIGN NUKTA
    contents.add(0x0AC1, 0x0AC5);//     GUJARATI   # (0xMn) GUJARATI VOWEL SIGN U, 0xGUJARATI VOWEL SIGN CANDRA E
    contents.add(0x0AC7, 0x0AC8);//     GUJARATI   # (0xMn) GUJARATI VOWEL SIGN E, 0xGUJARATI VOWEL SIGN AI
    contents.add(0x0ACD);//           GUJARATI   # (0xMn) GUJARATI SIGN VIRAMA
    contents.add(0x0B01);//           ORIYA      # (0xMn) ORIYA SIGN CANDRABINDU
    contents.add(0x0B3C);//           ORIYA      # (0xMn) ORIYA SIGN NUKTA
    contents.add(0x0B3F);//           ORIYA      # (0xMn) ORIYA VOWEL SIGN I
    contents.add(0x0B41, 0x0B43);//     ORIYA      # (0xMn) ORIYA VOWEL SIGN U, 0xORIYA VOWEL SIGN VOCALIC R
    contents.add(0x0B4D);//           ORIYA      # (0xMn) ORIYA SIGN VIRAMA
    contents.add(0x0B56);//           ORIYA      # (0xMn) ORIYA AI LENGTH MARK
    contents.add(0x0B82);//           TAMIL      # (0xMn) TAMIL SIGN ANUSVARA
    contents.add(0x0BC0);//           TAMIL      # (0xMn) TAMIL VOWEL SIGN II
    contents.add(0x0BCD);//           TAMIL      # (0xMn) TAMIL SIGN VIRAMA
    contents.add(0x0C3E, 0x0C40);//     TELUGU     # (0xMn) TELUGU VOWEL SIGN AA, 0xTELUGU VOWEL SIGN II
    contents.add(0x0C46, 0x0C48);//     TELUGU     # (0xMn) TELUGU VOWEL SIGN E, 0xTELUGU VOWEL SIGN AI
    contents.add(0x0C4A, 0x0C4D);//     TELUGU     # (0xMn) TELUGU VOWEL SIGN O, 0xTELUGU SIGN VIRAMA
    contents.add(0x0C55, 0x0C56);//     TELUGU     # (0xMn) TELUGU LENGTH MARK, 0xTELUGU AI LENGTH MARK
    contents.add(0x0CBF);//           KANNADA    # (0xMn) KANNADA VOWEL SIGN I
    contents.add(0x0CC6);//           KANNADA    # (0xMn) KANNADA VOWEL SIGN E
    contents.add(0x0CCC, 0x0CCD);//     KANNADA    # (0xMn) KANNADA VOWEL SIGN AU, 0xKANNADA SIGN VIRAMA
    contents.add(0x0D41, 0x0D43);//     MALAYALAM  # (0xMn) MALAYALAM VOWEL SIGN U, 0xMALAYALAM VOWEL SIGN VOCALIC R
    contents.add(0x0D4D);//           MALAYALAM  # (0xMn) MALAYALAM SIGN VIRAMA
    contents.add(0x0DCA);//           SINHALA    # (0xMn) SINHALA SIGN AL-LAKUNA
    contents.add(0x0DD2, 0x0DD4);//     SINHALA    # (0xMn) SINHALA VOWEL SIGN KETTI IS-PILLA, 0xSINHALA VOWEL SIGN KETTI PAA-PILLA
    contents.add(0x0DD6);//           SINHALA    # (0xMn) SINHALA VOWEL SIGN DIGA PAA-PILLA
    contents.add(0x0E31);//           THAI       # (0xMn) THAI CHARACTER MAI HAN-AKAT
    contents.add(0x0E34, 0x0E3A);//     THAI       # (0xMn) THAI CHARACTER SARA I, 0xTHAI CHARACTER PHINTHU
    contents.add(0x0E47, 0x0E4E);//     THAI       # (0xMn) THAI CHARACTER MAITAIKHU, 0xTHAI CHARACTER YAMAKKAN
    contents.add(0x0EB1);//           LAO        # (0xMn) LAO VOWEL SIGN MAI KAN
    contents.add(0x0EB4, 0x0EB9);//     LAO        # (0xMn) LAO VOWEL SIGN I, 0xLAO VOWEL SIGN UU
    contents.add(0x0EBB, 0x0EBC);//     LAO        # (0xMn) LAO VOWEL SIGN MAI KON, 0xLAO SEMIVOWEL SIGN LO
    contents.add(0x0EC8, 0x0ECD);//     LAO        # (0xMn) LAO TONE MAI EK, 0xLAO NIGGAHITA
    contents.add(0x0F18, 0x0F19);//     TIBETAN    # (0xMn) TIBETAN ASTROLOGICAL SIGN -KHYUD PA, 0xTIBETAN ASTROLOGICAL SIGN SDONG TSHUGS
    contents.add(0x0F35);//           TIBETAN    # (0xMn) TIBETAN MARK NGAS BZUNG NYI ZLA
    contents.add(0x0F37);//           TIBETAN    # (0xMn) TIBETAN MARK NGAS BZUNG SGOR RTAGS
    contents.add(0x0F39);//           TIBETAN    # (0xMn) TIBETAN MARK TSA -PHRU
    contents.add(0x0F71, 0x0F7E);//     TIBETAN    # (0xMn) TIBETAN VOWEL SIGN AA, 0xTIBETAN SIGN RJES SU NGA RO
    contents.add(0x0F80, 0x0F84);//     TIBETAN    # (0xMn) TIBETAN VOWEL SIGN REVERSED I, 0xTIBETAN MARK HALANTA
    contents.add(0x0F86, 0x0F87);//     TIBETAN    # (0xMn) TIBETAN SIGN LCI RTAGS, 0xTIBETAN SIGN YANG RTAGS
    contents.add(0x0F90, 0x0F97);//     TIBETAN    # (0xMn) TIBETAN SUBJOINED LETTER KA, 0xTIBETAN SUBJOINED LETTER JA
    contents.add(0x0F99, 0x0FBC);//     TIBETAN    # (0xMn) TIBETAN SUBJOINED LETTER NYA, 0xTIBETAN SUBJOINED LETTER FIXED-FORM RA
    contents.add(0x0FC6);//           TIBETAN    # (0xMn) TIBETAN SYMBOL PADMA GDAN
    contents.add(0x102D, 0x1030);//     MYANMAR    # (0xMn) MYANMAR VOWEL SIGN I, 0xMYANMAR VOWEL SIGN UU
    contents.add(0x1032);//           MYANMAR    # (0xMn) MYANMAR VOWEL SIGN AI
    contents.add(0x1036, 0x1037);//     MYANMAR    # (0xMn) MYANMAR SIGN ANUSVARA, 0xMYANMAR SIGN DOT BELOW
    contents.add(0x1039);//           MYANMAR    # (0xMn) MYANMAR SIGN VIRAMA
    contents.add(0x1058, 0x1059);//     MYANMAR    # (0xMn) MYANMAR VOWEL SIGN VOCALIC L, 0xMYANMAR VOWEL SIGN VOCALIC LL
    contents.add(0x17B7, 0x17BD);//     KHMER      # (0xMn) KHMER VOWEL SIGN I, 0xKHMER VOWEL SIGN UA
    contents.add(0x17C6);//           KHMER      # (0xMn) KHMER SIGN NIKAHIT
    contents.add(0x17C9, 0x17D3);//     KHMER      # (0xMn) KHMER SIGN MUUSIKATOAN, 0xKHMER SIGN BATHAMASAT
    contents.add(0x18A9);//           MONGOLIAN  # (0xMn) MONGOLIAN LETTER ALI GALI DAGALGA
    contents.add(0x1712, 0x1713);//     TAGALOG    # (0xMn) TAGALOG VOWEL SIGN I, 0xTAGALOG VOWEL SIGN U
    contents.add(0x1732, 0x1733);//     HANUNOO    # (0xMn) HANUNOO VOWEL SIGN I, 0xHANUNOO VOWEL SIGN U
    contents.add(0x1752, 0x1753);//     BUHID      # (0xMn) BUHID VOWEL SIGN I, 0xBUHID VOWEL SIGN U
    contents.add(0x1772, 0x1773);//     TAGBANWA   # (0xMn) TAGBANWA VOWEL SIGN I, 0xTAGBANWA VOWEL SIGN U
    //contents.add(0x1D165, 0x1D166);//   COMMON     # (0xMc) MUSICAL SYMBOL COMBINING STEM, 0xMUSICAL SYMBOL COMBINING SPRECHGESANG STEM
    //contents.add(0x1D16D, 0x1D172);//   COMMON     # (0xMc) MUSICAL SYMBOL COMBINING AUGMENTATION DOT, 0xMUSICAL SYMBOL COMBINING FLAG-5
    contents.add(0x0966, 0x096F);//     DEVANAGARI # (0xNd) DEVANAGARI DIGIT ZERO, 0xDEVANAGARI DIGIT NINE
    contents.add(0x09E6, 0x09EF);//     BENGALI    # (0xNd) BENGALI DIGIT ZERO, 0xBENGALI DIGIT NINE
    contents.add(0x0A66, 0x0A6F);//     GURMUKHI   # (0xNd) GURMUKHI DIGIT ZERO, 0xGURMUKHI DIGIT NINE
    contents.add(0x0AE6, 0x0AEF);//     GUJARATI   # (0xNd) GUJARATI DIGIT ZERO, 0xGUJARATI DIGIT NINE
    contents.add(0x0B66, 0x0B6F);//     ORIYA      # (0xNd) ORIYA DIGIT ZERO, 0xORIYA DIGIT NINE
    contents.add(0x0BE7, 0x0BEF);//     TAMIL      # (0xNd) TAMIL DIGIT ONE, 0xTAMIL DIGIT NINE
    contents.add(0x0C66, 0x0C6F);//     TELUGU     # (0xNd) TELUGU DIGIT ZERO, 0xTELUGU DIGIT NINE
    contents.add(0x0CE6, 0x0CEF);//     KANNADA    # (0xNd) KANNADA DIGIT ZERO, 0xKANNADA DIGIT NINE
    contents.add(0x0D66, 0x0D6F);//     MALAYALAM  # (0xNd) MALAYALAM DIGIT ZERO, 0xMALAYALAM DIGIT NINE
    contents.add(0x0E50, 0x0E59);//     THAI       # (0xNd) THAI DIGIT ZERO, 0xTHAI DIGIT NINE
    contents.add(0x0ED0, 0x0ED9);//     LAO        # (0xNd) LAO DIGIT ZERO, 0xLAO DIGIT NINE
    contents.add(0x0F20, 0x0F29);//     TIBETAN    # (0xNd) TIBETAN DIGIT ZERO, 0xTIBETAN DIGIT NINE
    contents.add(0x1040, 0x1049);//     MYANMAR    # (0xNd) MYANMAR DIGIT ZERO, 0xMYANMAR DIGIT NINE
    contents.add(0x1369, 0x1371);//     ETHIOPIC   # (0xNd) ETHIOPIC DIGIT ONE, 0xETHIOPIC DIGIT NINE
    contents.add(0x17E0, 0x17E9);//     KHMER      # (0xNd) KHMER DIGIT ZERO, 0xKHMER DIGIT NINE
    contents.add(0x1810, 0x1819);//     MONGOLIAN  # (0xNd) MONGOLIAN DIGIT ZERO, 0xMONGOLIAN DIGIT NINE
    contents.add(0x16EE, 0x16F0);//     RUNIC      # (0xNl) RUNIC ARLAUG SYMBOL, 0xRUNIC BELGTHOR SYMBOL
    contents.add(0x3007);//           HAN        # (0xNl) IDEOGRAPHIC NUMBER ZERO
    contents.add(0x3021, 0x3029);//     HAN        # (0xNl) HANGZHOU NUMERAL ONE, 0xHANGZHOU NUMERAL NINE
    contents.add(0x3038, 0x303A);//     HAN        # (0xNl) HANGZHOU NUMERAL TEN, 0xHANGZHOU NUMERAL THIRTY
    contents.add(0x1034A);//          GOTHIC     # (0xNl) GOTHIC LETTER NINE HUNDRED
    contents.add(0x0BF0, 0x0BF2);//     TAMIL      # (0xNo) TAMIL NUMBER TEN, 0xTAMIL NUMBER ONE THOUSAND
    contents.add(0x0F2A, 0x0F33);//     TIBETAN    # (0xNo) TIBETAN DIGIT HALF ONE, 0xTIBETAN DIGIT HALF ZERO
    contents.add(0x1372, 0x137C);//     ETHIOPIC   # (0xNo) ETHIOPIC NUMBER TEN, 0xETHIOPIC NUMBER TEN THOUSAND
    contents.add(0x2E80, 0x2E99);//     HAN        # (0xSo) CJK RADICAL REPEAT, 0xCJK RADICAL RAP
    contents.add(0x2E9B, 0x2EF3);//     HAN        # (0xSo) CJK RADICAL CHOKE, 0xCJK RADICAL C-SIMPLIFIED TURTLE
    contents.add(0x2F00, 0x2FD5);//     HAN        # (0xSo) KANGXI RADICAL ONE, 0xKANGXI RADICAL FLUTE
    contents.add(0xA490, 0xA4A1);//     YI         # (0xSo) YI RADICAL QOT, 0xYI RADICAL GA
    contents.add(0xA4A4, 0xA4B3);//     YI         # (0xSo) YI RADICAL DDUR, 0xYI RADICAL JO
    contents.add(0xA4B5, 0xA4C0);//     YI         # (0xSo) YI RADICAL JJY, 0xYI RADICAL SHAT
    contents.add(0xA4C2, 0xA4C4);//     YI         # (0xSo) YI RADICAL SHOP, 0xYI RADICAL ZZIET
    contents.add(0xA4C6);//           YI         # (0xSo) YI RADICAL KE
    return contents;
  }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/ScriptTimeline.java
+++ b/tools/unicodetools/com/ibm/text/UCD/ScriptTimeline.java
@ -1,25 +0,0 @@
 package com.ibm.text.UCD;
 import com.ibm.icu.dev.test.util.UnicodeProperty;
 import com.ibm.icu.lang.UScript;
 import com.ibm.icu.text.UnicodeSet;
 import java.util.List;
 public class ScriptTimeline {
  public static void main(String[] args) {
    String[] versions = { "2.0.0", "2.1.2", "3.0.0", "3.1.0", "3.2.0", "4.0.0", "4.1.0", "5.0.0" };
    for (int s = 0; s < UScript.CODE_LIMIT; ++s) {
      String scriptName = UScript.getName(s);
      UnicodeSet chars = new UnicodeSet().applyPropertyAlias("script", scriptName);
      if (chars.size() == 0) continue;
      System.out.print(scriptName);
      for (int v = 0; v < versions.length; ++v) {
        UnicodeSet age = new UnicodeSet();
        age.applyPropertyAlias("age", versions[v]);
        System.out.print("\t" + new UnicodeSet(chars).retainAll(age).size());
      }
      System.out.println();
    }
  }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/SpecialCasingFooter.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/SpecialCasingFooter.txt
@ -1,75 +0,0 @@
 # ================================================================================
 # Conditional mappings
 # ================================================================================
 # Special case for final form of sigma
 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
 # Note: the following cases for non-final are already in the UnicodeData file.
 # 03A3; 03C3; 03A3; 03A3; # GREEK CAPITAL LETTER SIGMA
 # 03C3; 03C3; 03A3; 03A3; # GREEK SMALL LETTER SIGMA
 # 03C2; 03C2; 03A3; 03A3; # GREEK SMALL LETTER FINAL SIGMA
 # Note: the following cases are not included, since they would case-fold in lowercasing
 # 03C3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK SMALL LETTER SIGMA
 # 03C2; 03C3; 03A3; 03A3; Not_Final_Sigma; # GREEK SMALL LETTER FINAL SIGMA
 # ================================================================================
 # Locale-sensitive mappings
 # ================================================================================
 # Lithuanian
 # Lithuanian retains the dot in a lowercase i when followed by accents.
 # Remove DOT ABOVE after "i" with upper or titlecase
 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
 # Introduce an explicit dot above when lowercasing capital I's and J's
 # whenever there are more accents above.
 # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
 # ================================================================================
 # Turkish and Azeri
 # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
 # The following rules handle those cases.
 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE
 0130; 0069; 0130; 0130; az; # LATIN CAPITAL LETTER I WITH DOT ABOVE
 # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
 # This matches the behavior of the canonically equivalent I-dot_above
 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
 # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
 0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I
 # When uppercasing, i turns into a dotted capital I
 0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I
 0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I
 # Note: the following case is already in the UnicodeData file.
 # 0131; 0131; 0049; 0049; tr; # LATIN SMALL LETTER DOTLESS I
 # EOF
--- a/tools/unicodetools/com/ibm/text/UCD/SpecialCasingHeader.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/SpecialCasingHeader.txt
@ -1,46 +0,0 @@
 #
 # Special Casing Properties
 #
 # This file is a supplement to the UnicodeData file.
 # It contains additional information about the casing of Unicode characters.
 # (For compatibility, the UnicodeData.txt file only contains case mappings for
 # characters where they are 1-1, and does not have locale-specific mappings.)
 # For more information, see the discussion of Case Mappings in the Unicode Standard.
 #
 # All code points not listed in this file that do not have a simple case mappings
 # in UnicodeData.txt map to themselves.
 # ================================================================================
 # Format
 # ================================================================================
 # The entries in this file are in the following machine-readable format:
 #
 # <code>; <lower> ; <title> ; <upper> ; (<condition_list> ;)? # <comment>
 #
 # <code>, <lower>, <title>, and <upper> provide character values in hex. If there is more
 # than one character, they are separated by spaces. Other than as used to separate 
 # elements, spaces are to be ignored.
 #
 # The <condition_list> is optional. Where present, it consists of one or more locale IDs
 # or contexts, separated by spaces. In these conditions:
 # - A condition list overrides the normal behavior if all of the listed conditions are true.
 # - The context is always the context of the characters in the original string,
 #   NOT in the resulting string.
 # - Case distinctions in the condition list are not significant.
 # - Conditions preceded by "Not_" represent the negation of the condition.
 #
 # A locale ID is defined by taking any language tag as defined by
 # RFC 3066 (or its successor), and replacing '-' by '_'.
 #
 # A context for a character C is defined by Section 3.13 Default Case 
 # Operations, of The Unicode Standard, Version 5.0.
 # (This is identical to the context defined by Unicode 4.1.0,
 #  as specified in http://www.unicode.org/versions/Unicode4.1.0/)
 #
 # Parsers of this file must be prepared to deal with future additions to this format:
 #  * Additional contexts
 #  * Additional fields
 # ================================================================================
 # ================================================================================
 # Unconditional mappings
 # ================================================================================
--- a/tools/unicodetools/com/ibm/text/UCD/SpecialCasingIota.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/SpecialCasingIota.txt
@ -1,13 +0,0 @@
 # IMPORTANT-when capitalizing iota-subscript (0345)
 #  It MUST be in normalized form--moved to the end of any sequence of combining marks.
 #  This is because logically it represents a following base character!
 #  E.g. <iota_subscript> (<Mn> | <Mc> | <Me>)+ => (<Mn> | <Mc> | <Me>)+ <iota_subscript>
 # It should never be the first character in a word, so in titlecasing it can be left as is.
 # The following cases are already in the UnicodeData file, so are only commented here.
 # 0345; 0345; 0345; 0399; # COMBINING GREEK YPOGEGRAMMENI
 # All letters with YPOGEGRAMMENI (iota-subscript) or PROSGEGRAMMENI (iota adscript)
 # have special uppercases.
 # Note: characters with PROSGEGRAMMENI are actually titlecase, not uppercase!
--- a/tools/unicodetools/com/ibm/text/UCD/StandardizedVariants-Template.html
+++ b/tools/unicodetools/com/ibm/text/UCD/StandardizedVariants-Template.html
@ -1,108 +0,0 @@
 <!doctype HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 <meta http-equiv="Content-Language" content="en-us">
 <meta name="GENERATOR" content="Microsoft FrontPage 5.0">
 <meta name="ProgId" content="FrontPage.Editor.Document">
 <meta name="keywords" content="unicode, variant glyphs">
 <meta name="description" content="Describes and displays standardized variant glyphs">
 <title>Standardized Variants</title>
 <link rel="stylesheet" type="text/css" href="http://www.unicode.org/reports/reports.css">
 </head>
 <body bgcolor="#ffffff">
 <table class="header">
  <tr>
    <td class="icon"><a href="http://www.unicode.org">
    <img align="middle" alt="[Unicode]" border="0" src="http://www.unicode.org/webscripts/logo60s2.gif" width="34" height="33"></a>&nbsp;&nbsp;<a class="bar" href="http://www.unicode.org/ucd">Unicode 
    Character Database</a></td>
  </tr>
  <tr>
    <td class="gray">&nbsp;</td>
  </tr>
 </table>
 <blockquote>
  <h1>Standardized Variants</h1>
  <table class="wide">
    <tr>
      <td valign="top" width="144">Revision</td>
      <td valign="top">@revision@</td>
    </tr>
    <tr>
      <td valign="top" width="144">Authors</td>
      <td valign="top">Members of the Editorial Committee</td>
    </tr>
    <tr>
      <td valign="top" width="144">Date</td>
      <td valign="top">@date@</td>
    </tr>
    <tr>
      <td valign="top" width="144">This Version</td>
      <td valign="top"><a href="http://www.unicode.org/Public/@updateDirectory@/@filename@.html">
      http://www.unicode.org/Public/@updateDirectory@/@filename@.html</a></td>
    </tr>
    <tr>
      <td valign="top" width="144">Previous Version</td>
      <td valign="top"><a href="http://www.unicode.org/Public/4.1.0/ucd/StandardizedVariants.html">
      http://www.unicode.org/Public/4.1.0/ucd/StandardizedVariants.html</a></td>
    </tr>
    <tr>
      <td valign="top" width="144">Latest Version</td>
      <td valign="top"><a href="http://www.unicode.org/Public/UNIDATA/StandardizedVariants.html">
      http://www.unicode.org/Public/UNIDATA/StandardizedVariants.html</a></td>
    </tr>
  </table>
  <h3><br>
  <i>Summary</i></h3>
  <blockquote>
    <p>This file provides a visual display of the standard variant sequences derived from 
    StandardizedVariants.txt.</p>
  </blockquote>
  <h3><i>Status</i></h3>
  <blockquote>
    <p><i>This file and the files described herein are part of the Unicode Character Database and 
    are governed by the terms of use at <a href="http://www.unicode.org/terms_of_use.html">
    http://www.unicode.org/terms_of_use.html</a>.</i></p>
  </blockquote>
  <hr width="50%">
  <h2>Introduction</h2>
  <p>The tables here <i>exhaustively</i> lists the valid, registered combinations of base character 
  plus variation indicator. All combinations not listed in StandardizedVariants.txt are unspecified 
  and are reserved for future standardization; no conformant process may interpret them as 
  standardized variants. Variation selectors and their use are described in The Unicode Standard.</p>
  <p>These mathematical variants are all produced with the addition of Variation Selector 1 (VS1 or 
  U+FE00) to mathematical operator base characters. There is no variation according to context. The 
  Mongolian variants use the Mongolian Variant Selectors, and may vary according to context. That 
  is, if a contextual shape is not listed below, then the variation sequence has an unmodified 
  appearance. At this time no Han variants exist.</p>
  <blockquote>
    <p><a name="fonts"><b>Note: </b></a>The glyphs used to show the variations are often derived 
    from different physical fonts than the representative glyphs in the standard. They may therefore 
    exhibit minor differences in size, proportion, or weight <i>unrelated</i> to the intentional 
    difference in feature that is the defining element of the variation. Such minor differences 
    should be ignored. Likewise, in some cases the existing representative fonts may not yet contain 
    newly encoded characters and hence some representative glyphs shown in these tables may have a 
    slightly different style than others.</p>
  </blockquote>
  <p>@table@</p>
  <hr width="50%">
  <div align="center">
    <center>
    <table cellspacing="0" cellpadding="0" border="0">
      <tr>
        <td><a href="http://www.unicode.org/unicode/copyright.html">
        <img src="http://www.unicode.org/img/hb_notice.gif" border="0" alt="Access to Copyright and terms of use" width="216" height="50"></a></td>
      </tr>
    </table>
    <script language="Javascript" type="text/javascript" src="http://www.unicode.org/webscripts/lastModified.js">
    </script>
    </center>
  </div>
 </blockquote>
 </body>
 </html>
--- a/tools/unicodetools/com/ibm/text/UCD/TernaryStore.java
+++ b/tools/unicodetools/com/ibm/text/UCD/TernaryStore.java
@ -1,566 +0,0 @@
 package com.ibm.text.UCD;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.lang.UCharacter;
 import com.ibm.text.utility.*;
 import java.util.*;
 import java.io.*;
 // Enumerated properties will be IntCodePointProperty.
 // The string values they return will be the property value names.
 // Binary properties are Enumerated properties. They return 0 or 1
 public final class TernaryStore {
    static final int DONE = Integer.MIN_VALUE;
    static final int NOT_FOUND = Integer.MIN_VALUE+1;
    // for testing
    static DepthPrinter dp;
    static void test() throws java.io.IOException {
        PrintWriter pw = Utility.openPrintWriter("TestTernary.txt", Utility.LATIN1_WINDOWS);
        try {
            dp = new DepthPrinter(pw);
            String[] tests = {"the", "quick", "fish", "fisherman", "fishes", 
                "brown", "brow", "bracket", "bright", "brat",
                "brough", "dogs", "upper", "zebra",
                "fisher"};
            test("Simple: ", tests, tests.length);
            tests = new String[300000];
            int counter = 0;
            int i;
            for (i = 0; counter < tests.length && i <= 0x10FFFF; ++i) {
                if (Default.ucd().hasComputableName(i)) continue;
                String temp = UCharacter.getName(i);
                if (temp != null) tests[counter++] = temp.trim();
            }
            System.out.println("max-cp: " + Utility.hex(i));
            test("Unicode Names: ", tests, counter);
            //if (true) return;
            BufferedReader br = Utility.openReadFile(UCD_Types.BASE_DIR + "dict\\DiploFreq.txt", Utility.LATIN1);
            String line;
            counter = 0;
            while (counter < tests.length) {
                line = Utility.readDataLine(br);
                if (line == null) break;
                if (line.length() == 0) continue;
                Utility.dot(counter);
                int tabPos = line.indexOf('\t');
                if (tabPos < 0) {
                    System.out.println("???" + line);
                    continue;
                }
                tests[counter++] = line.substring(tabPos+1);
            }
            test("French: ", tests, counter);
        } finally {
            pw.close();
        }
    }
    static void test(String title, String[] tests, int len) {
        System.out.println();
        System.out.println(title);
        dp.println();
        dp.print(title, 0);
        dp.println();
        TernaryStore.Builder builder = new TernaryStore.Builder();
        int charCount = 0;
        for (int i = 0; i < len; ++i) {
            builder.add(tests[i], i);
            charCount += tests[i].length();
        }
        System.out.println("charCount: " + charCount);
        TernaryStore store = builder.build();
        store.showNodes();
        store.checkNodes();
        dp.println("Storage");
        dp.println(store.stringStore.toString());
        System.out.println("StorageSize: " + store.stringStore.toString().length());
        Matcher matcher = store.getMatcher();
        for (int i = 0; i < len; ++i) {
            int check = test(tests[i], matcher);
            if (check != i) {
                System.out.println("\tFail, result: " + tests[i] + ", " + check);
            }
        }
    }
    static int test(String s, Matcher matcher) {
        matcher.reset(s, 0);
        int lastResult = -1;
        for (int result = matcher.next(); result != DONE; result = matcher.next()) {
            lastResult = result;
        }
        return lastResult;
    }
    static final class Node {
        String getString(StringStore stringStore) {
            if (stringCode < 0) return tempString;
            return stringStore.get(stringCode);
        }
        void setString(String s) {
            tempString = s;
        }
        String tempString;
        int stringCode = -1;
        Node less;
        Node greater;
        Node next;
        int result = NOT_FOUND;
        public String toString(StringStore store) {
            return getString(store)
                + (result != NOT_FOUND ? "(" + result + ")" : "")
                + (next != null ? next.toString() : "");
        }
    }
    Node base;
    StringStore stringStore = new StringStore();
    final static class Matcher {
        TernaryStore store;
        String s;
        int position;
        Node lastNode;
        void reset(String s, int position) {
            this.s = s;
            this.position = position;
            this.lastNode = store.base;
        }
        // returns the next result
        // or DONE when done
        // sets position to point after end of found string
        int next() {
            while (lastNode != null && position < s.length()) {
                char ch = s.charAt(position++);
                do {
                    String nodeString = lastNode.getString(store.stringStore);
                    char first = nodeString.charAt(0);
                    if (ch == first) {
                        // now check the rest of the string
                        for (int i = 1; i < nodeString.length(); ++i) {
                            char other = nodeString.charAt(i);
                            if (other != s.charAt(position++)) {
                                return DONE;
                            }
                        }
                        // if we succeed, return result if there is one
                        int result = lastNode.result;
                        lastNode = lastNode.next;
                        if (result != NOT_FOUND) return result;
                        break; // get next char
                    }
                    // otherwise branch sideways, keeping same char
                    if (ch > first) {
                        lastNode = lastNode.greater;
                    } else {
                        lastNode = lastNode.less;
                    }
                } while (lastNode != null);
            }
            return DONE;
        }
    }
    public Matcher getMatcher() {
        Matcher result = new Matcher();
        result.store = this;
        return result;
    }
    public void showNodes() {
        showNodes2(base, "", 5);
    }
    public void showNodes2(Node n, String path, int depth) {
        if (n.less != null) {
            showNodes2(n.less, path+"-", depth);
        }
        dp.print("", depth);
        if (false) dp.print(path);
        dp.print(n.getString(stringStore));
        if (n.result != NOT_FOUND) dp.print("/" + n.result);
        dp.println();
        if (n.next != null) {
            showNodes2(n.next, path+".", depth+n.getString(stringStore).length());
        }
        if (n.greater != null) {
            showNodes2(n.greater, path+"+", depth);
        }
    }
    static class NodeInfo {
        int nodeCount;
        int resultCount;
        int nullLessCount;
        int nullGreaterCount;
        int nullSimpleCount;
        int nullNextCount;
    }
    public void checkNodes() {
        NodeInfo nodeInfo = new NodeInfo();
        checkNodes(base, nodeInfo);
        System.out.println("Nodes: " + nodeInfo.nodeCount);
        System.out.println("nullLessCount: " + nodeInfo.nullLessCount);
        System.out.println("nullGreaterCount: " + nodeInfo.nullGreaterCount);
        System.out.println("nullNextCount: " + nodeInfo.nullNextCount);
        System.out.println("resultCount: " + nodeInfo.resultCount);
        System.out.println("nullSimpleCount: " + nodeInfo.nullSimpleCount);
    }
    public void checkNodes(Node n, NodeInfo nodeInfo) {
        nodeInfo.nodeCount++;
        if (n.result != NOT_FOUND) nodeInfo.resultCount++;
        if (n.less != null) {
            checkNodes(n.less, nodeInfo);
        } else {
            nodeInfo.nullLessCount++;
            if (n.greater == null && n.result == NOT_FOUND) nodeInfo.nullSimpleCount++;
        }
        if (n.next != null) {
            checkNodes(n.next, nodeInfo);
        } else {
            nodeInfo.nullNextCount++;
        }
        if (n.greater != null) {
            checkNodes(n.greater, nodeInfo);
        } else {
            nodeInfo.nullGreaterCount++;
        }
    }
    final static class DepthPrinter {
        private PrintWriter pw;
        private int currentDepth = 0;
        private String leader = ".";
        DepthPrinter(PrintWriter pw) {
            this.pw = pw;
        }
        void print(char ch) {
            print(ch, 0);
        }
        void print(String s) {
            print(s, 0);
        }       
        void print(char ch, int depth) {
            print(String.valueOf(ch), depth);
        }
        void print(String s, int depth) {
            int delta = depth - currentDepth;
            if (delta > 0) {
                pw.print(Utility.repeat(leader, delta - 1));
                currentDepth = depth;
            }
            pw.print(s);
            currentDepth += s.length();
        }
        void println() {
            pw.println();
            currentDepth = 0;
        }
        void println(String s) {
            pw.print(s);
            pw.println();
            currentDepth = 0;
        }
    }
    final static class StringStore {
        // initially, there is a simple strategy
        private String buffer = "";
        private static final char TERMINATOR = '\u007E';
        private static final int PIECE_LENGTH = 5;
        private static String[] pieces = new String[50]; // HACK
        private static Set strings = new HashSet();
        public void add(String s) {
            strings.add(s);
        }
        public void compact() {
            System.out.println("Adding Pieces");
            // add all the pieces
            Iterator it = strings.iterator();
            Set additions = new HashSet();
            while (it.hasNext()) {
                String s = (String)it.next();
                int len = Utility.split(s, ' ', pieces);
                for (int i = 0; i < len; ++i) {
                    additions.add(pieces[i]);
                }
            }
            store(additions);
            store(strings);
        }
        private void store(Set stuff) {
            System.out.println("Sorting");
            // sort them by length, longest first
            Set ordered = new TreeSet();
            Iterator it = stuff.iterator();
            while (it.hasNext()) {
                String s = (String)it.next();
                ordered.add(new Pair(new Integer(-s.length()), s));
            }
            System.out.println("Storing");
            // add them
            it = ordered.iterator();
            while (it.hasNext()) {
                String s = (String)(((Pair)it.next()).second);
                get(s);
            }
        }
        private int get(String s) {
            System.out.println("Adding: \'" + s + "\'");
            int index;
            if (s.indexOf(' ') < 0) {
                index = addNoSplit(s);
                System.out.println("\tReturning: " + index);
                return index;
            }
            int len = Utility.split(s, ' ', pieces);
            StringBuffer itemCodes = new StringBuffer();
            for (int i = 0; i < len; ++i) {
                String piece = pieces[i];
                itemCodes.append((char)addNoSplit(piece));
                /*for (int j = 0; j < piece.length(); j += PIECE_LENGTH) {
                    int maxLen = j + PIECE_LENGTH;
                    if (maxLen > piece.length()) maxLen = piece.length();
                    itemCodes.append((char)addNoSplit(piece.substring(j, maxLen)));
                }*/
            }
            index = 0x8000 | addNoSplit(itemCodes.toString());   // mark it as composite
            System.out.println("\tReturning: " + index);
            return index;
        }
        private int addNoSplit(String s) {
            System.out.println("\tAdding2: \'" + s + "\'");
            String sTerm = s + TERMINATOR;
            int index = buffer.indexOf(sTerm);
            if (index >= 0) return index;
            index = buffer.length();
            buffer += sTerm;
            System.out.println("\t\tReturning2: " + index);
            return index;
        }
        public String get(int index) {
            String result;
            System.out.println("Fetching: " + index);
            if ((index & 0x8000) == 0) {
                int end = buffer.indexOf(TERMINATOR, index);
                result = buffer.substring(index, end);
                System.out.println("\tReturning: '" + result + "'");
                return result;
            }
            index &= ~0x8000; // remove 1 bit
            int end = buffer.indexOf(TERMINATOR, index);
            result = "";
            for (int i = index; i < end; ++i) {
                if (result.length() != 0) result += " ";
                result += get(buffer.charAt(i));
            }
            System.out.println("\tReturning: '" + result + "'");
            return result;
        }
        public String toString() {
            return buffer;
        }
    }
    final static class Builder {
        Map map = new TreeMap();
        String[] names;
        TernaryStore store;
        Set set = new TreeSet();
        public void add(String name, int result) {
            map.put(name, new Integer(result));
        }
        public TernaryStore build() {
            // flatten strings into array
            names = new String[map.size()];
            Iterator it = map.keySet().iterator();
            int count = 0;
            while (it.hasNext()) {
                names[count++] = (String) it.next();
                if (false) {
                    dp.print((count-1) + " " + names[count-1]);
                    dp.println();
                }
            }
            // build nodes
            store = new TernaryStore();
            addNode(0, names.length);
            // free storage
            names = null;
            map.clear();
            System.out.println("compacting");
            compactStore(store.base);
            store.stringStore.compact();
            //compactStrings(store);
            //set.clear();    // free more storage
            replaceStrings(store.base);
            //map.clear();    // free storage
            // free storage
            TernaryStore result = store;
            store = null;
            return result;
        }
        /*
        void compactStrings(TernaryStore t) {
            // we have a set of Pairs, first is length, second is string
            // compact them, word by word
            Iterator it = set.iterator();
            while (it.hasNext()) {
                String string = ((String)((Pair)it.next()).second);
                int index = t.stringStore.add(string);
                if (true) {
                    System.out.println("Checking: " + index);
                    String reverse = t.stringStore.get(index);
                    if (!reverse.equals(string)) {
                        System.out.println("source: \'" + string + "\'");
                        System.out.println("reverse: \'" + reverse + "\'");
                        throw new IllegalArgumentException("Failed roundtrip");
                    }
                }
                map.put(string, new Integer(index));
            }
        }
        */
        public void replaceStrings(Node n) {
            n.stringCode = store.stringStore.get(n.getString(store.stringStore));
            n.setString(null);
            if (n.less != null) replaceStrings(n.less);
            if (n.next != null) replaceStrings(n.next);
            if (n.greater != null) replaceStrings(n.greater);
        }
        public void compactStore(Node n) {
            Node nextNode = n.next;
            if (false) dp.println(n.toString());
            while (n.result == NOT_FOUND && nextNode != null && nextNode.greater == null
                && nextNode.less == null) {
                n.setString(n.getString(store.stringStore) + nextNode.getString(store.stringStore));
                n.result = nextNode.result;
                n.next = nextNode = nextNode.next; // remove old node
            }
            // add strings sorted by length, longest first
            store.stringStore.add(n.getString(store.stringStore)); 
            if (n.less != null) compactStore(n.less);
            if (n.next != null) compactStore(n.next);
            if (n.greater != null) compactStore(n.greater);
        }
        private void addNode(int start, int limit) {
            if (start >= limit) return;
            int mid = (start + limit) / 2;
            //System.out.println("start: " + start + ", mid: " + mid + ", limit: " + limit);
            //System.out.println("adding: " + names[mid]);
            addNode(names[mid], ((Integer)map.get(names[mid])).intValue());
            addNode(start, mid);
            addNode(mid+1, limit);
        }
        private void addNode(String s, int result) {
            if (store.base == null) {
                store.base = addRest(s, 0, result);
                return;
            }
            Node n = store.base;
            Node lastNode = n;
            for (int i = 0; i < s.length(); ++i) {
                char ch = s.charAt(i);
                while (true) {
                    char first = n.getString(store.stringStore).charAt(0);
                    if (ch == first) {
                        if (n.next == null) {
                            n.next = addRest(s, i+1, result);
                            return;
                        }
                        lastNode = n;
                        n = n.next;
                        break; // get next char
                    }
                    // otherwise branch sideways, keeping same char
                    if (ch > first) {
                        if (n.greater == null) {
                            n.greater = addRest(s, i, result);
                            return;
                        }
                        n = n.greater;
                    } else {
                        if (n.less == null) {
                            n.less = addRest(s, i, result);
                            return;
                        }
                        n = n.less;
                    }
                }
            }
            lastNode.result = result;
        }
        private Node addRest(String s, int position, int result) {
            Node lastNode = null;
            for (int i = s.length() - 1; i >= position; --i) {
                Node n = new Node();
                n.setString(s.substring(i, i+1)); // + "" to force a new string
                if (lastNode == null) {
                    n.result = result;
                }
                n.next = lastNode;
                lastNode = n;
            }
            return lastNode;
        }
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/TestData.java
+++ b/tools/unicodetools/com/ibm/text/UCD/TestData.java
--- a/tools/unicodetools/com/ibm/text/UCD/TestIdentifiers.java
+++ b/tools/unicodetools/com/ibm/text/UCD/TestIdentifiers.java
@ -1,378 +0,0 @@
 package com.ibm.text.UCD;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.BitSet;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import com.ibm.icu.dev.test.util.BagFormatter;
 import com.ibm.icu.dev.test.util.UnicodeMap;
 import com.ibm.icu.dev.test.util.XEquivalenceClass;
 import com.ibm.icu.lang.UScript;
 import com.ibm.icu.text.Normalizer;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.text.UnicodeSetIterator;
 import com.ibm.text.utility.Utility;
 import com.ibm.icu.lang.UCharacter;
 public class TestIdentifiers {
 	public static void main(String[] args) throws IOException {
 		String[] tests = { "SØS", "façade", "MOPE", "VOP", "scope", "ibm", "vop",
 				"toys-я-us", "1iνе", "back", "boгing" };
 		TestIdentifiers ti = new TestIdentifiers("L");
 		TestIdentifiers tiany = new TestIdentifiers("A");
 		ti.loadIdentifiers();
 		UnicodeSet idnCharSet = ti.idnChars.getSet("output", new UnicodeSet());
 		System.out.println("idnCharSet: " + idnCharSet.size());
 		UnicodeSet idnCharNonStarting = ti.nonstarting;
 		System.out.println("idnCharNonStarting: " + idnCharSet);
 		if (true) return;
 		for (int i = 0; i < tests.length; ++i) {
 			System.out.print(tests[i]);
 			String folded = UCharacter.foldCase(tests[i], true);
 			if (folded.equals(tests[i])) {
 				ti.testItem(tests[i]);
 			} else {
 				System.out.print("\t");
 				tiany.testItem(tests[i]);
 				System.out.print(folded);
 				ti.testItem(folded);
 			}
 			for (int j = 0; j < tests[i].length(); ++j) {
 				int cp = tests[i].charAt(j);
 				Set s = ti.getConfusables(cp, "MA");
 				System.out.println(Default.ucd().getCodeAndName(cp));
 				for (Iterator it = s.iterator(); it.hasNext();) {
 					System.out.println("\t= " + Default.ucd().getCodeAndName((String)it.next()));
 				}
 			}
 		}
 	}
 	void testItem(String test) {
 		test = Normalizer.normalize(test, Normalizer.DECOMP_COMPAT);
 		BitSet scripts = new BitSet();
 		System.out.print("\t" + caseType + "\t");
 		boolean foundProblem = false;
 		if (hasWholeScriptConfusable(test, scripts)) {
 			System.out.print("whole-script confusables: ");
 			for (int j = 0; j < scripts.length(); ++j) {
 				if (scripts.get(j))
 					System.out.print(UScript.getName(j) + " ");
 			}
 			System.out.println();
 			foundProblem = true;
 		}
 		if (hasMixedScriptConfusable(test)) {
 			System.out.println("mixed-script confusable");
 			foundProblem = true;
 		}
 		if (!foundProblem) {
 			System.out.println("no confusable");
 		}
 	}
 	private static final String indir = "C:\\Unicode-CVS2\\draft\\reports\\tr36\\data\\";
 	private static UnicodeSet commonAndInherited = new UnicodeSet(
 			"[[:script=common:][:script=inherited:]]");
 	private static UnicodeSet XIDContinueSet = new UnicodeSet("[:xidcontinue:]")
 			.add('-');
 	private static final boolean DEBUG = false;
 	private String caseType;
 	TestIdentifiers(String caseType) throws IOException {
 		this.caseType = caseType;
 		loadWholeScriptConfusables(caseType);
 	}
 	private static class UnicodeSetToScript {
 		public int getScript() {
 			return script;
 		}
 		public UnicodeSetToScript setScript(int script) {
 			this.script = script;
 			return this;
 		}
 		public UnicodeSet getSet() {
 			return set;
 		}
 		public UnicodeSetToScript setSet(UnicodeSet set) {
 			this.set = set;
 			return this;
 		}
 		private UnicodeSet set;
 		private int script;
 	}
 	UnicodeSetToScript[][] scriptToUnicodeSetToScript = new UnicodeSetToScript[UScript.CODE_LIMIT][];
 	UnicodeSet[] fastReject = new UnicodeSet[UScript.CODE_LIMIT];
 	UnicodeMap idnChars = new UnicodeMap();
 	UnicodeSet nonstarting = new UnicodeSet();
 	void loadIdentifiers() throws IOException {
 		BufferedReader br = BagFormatter.openUTF8Reader(indir,
 				"idnchars.txt");
 		String line = null;
 		try {
 			while (true) {
 				line = Utility.readDataLine(br);
 				if (line == null)
 					break;
 				if (line.length() == 0)
 					continue;
 				String[] pieces = Utility.split(line, ';');
 				// part 0 is range
 				String range = pieces[0].trim();
 				int rangeDivider = range.indexOf("..");
 				int start, end;
 				if (rangeDivider < 0) {
 					start = end = Integer.parseInt(range, 16);
 				} else {
 					start = Integer.parseInt(range.substring(0, rangeDivider),
 							16);
 					end = Integer.parseInt(range.substring(rangeDivider + 2),
 							16);
 				}
 				// part 1 is script1
 				String type = pieces[1].trim().intern();
 				if (type.equals("nonstarting")) nonstarting.add(start,end);
 				else idnChars.putAll(start, end, type);
 			}
 		} catch (Exception e) {
 			throw (RuntimeException) new RuntimeException("Failure on line "
 					+ line).initCause(e);
 		}
 		br.close();
 	}
 	Map type_equivalences;
 	void loadConfusables() throws IOException {
 		BufferedReader br = BagFormatter.openUTF8Reader(indir,
 				"confusables.txt");
 		String line = null;
 		type_equivalences = new HashMap();
 		try {
 			while (true) {
 				line = Utility.readDataLine(br);
 				if (line == null)
 					break;
 				if (line.length() == 0)
 					continue;
 				String[] pieces = Utility.split(line, ';');
 				// part 0 is source code point
 				String s = Utility.fromHex(pieces[0].trim());
 				// part 1 is script1
 				String t = Utility.fromHex(pieces[1].trim());
 				String type = pieces[2].trim();
 				XEquivalenceClass ec = (XEquivalenceClass) type_equivalences.get(type);
 				if (ec == null) type_equivalences.put(type, ec = new XEquivalenceClass(""));
 				ec.add(s, t);
 				//System.out.println(type + ": " + Default.ucd().getCodeAndName(s) + " => " + Default.ucd().getCodeAndName(t));
 			}
 		} catch (Exception e) {
 			throw (RuntimeException) new RuntimeException("Failure on line "
 					+ line).initCause(e);
 		}
 		br.close();
 	}
 	public Set getConfusables(int cp, String type) {
 		try {
 			if (type_equivalences == null) loadConfusables();
 		} catch (IOException e) {
 			return null;
 		}
 		XEquivalenceClass ec = (XEquivalenceClass) type_equivalences.get(type);
 		return ec.getEquivalences(UTF16.valueOf(cp));
 	}
 	void loadWholeScriptConfusables(String filterType) throws IOException {
 		UnicodeSet[][] script_script_set = new UnicodeSet[UScript.CODE_LIMIT][UScript.CODE_LIMIT];
 		for (int i = 0; i < UScript.CODE_LIMIT; ++i) {
 			script_script_set[i] = new UnicodeSet[UScript.CODE_LIMIT];
 		}
 		BufferedReader br = BagFormatter.openUTF8Reader(indir,
 				"confusablesWholeScript.txt");
 		String line = null;
 		try {
 			while (true) {
 				line = Utility.readDataLine(br);
 				if (line == null)
 					break;
 				if (line.length() == 0)
 					continue;
 				String[] pieces = Utility.split(line, ';');
 				// part 0 is range
 				String range = pieces[0].trim();
 				int rangeDivider = range.indexOf("..");
 				int start, end;
 				if (rangeDivider < 0) {
 					start = end = Integer.parseInt(range, 16);
 				} else {
 					start = Integer.parseInt(range.substring(0, rangeDivider),
 							16);
 					end = Integer.parseInt(range.substring(rangeDivider + 2),
 							16);
 				}
 				// part 1 is script1
 				int script1 = UScript.getCodeFromName(pieces[1].trim());
 				// part 2 is script2
 				int script2 = UScript.getCodeFromName(pieces[2].trim());
 				String type = pieces[3].trim();
 				if (!type.equals(filterType))
 					continue;
 				if (script_script_set[script1][script2] == null) {
 					script_script_set[script1][script2] = new UnicodeSet();
 				}
 				script_script_set[script1][script2].add(start, end);
 			}
 			for (int i = 0; i < script_script_set.length; ++i) {
 				UnicodeSet accept = new UnicodeSet();
 				List curr = new ArrayList();
 				for (int j = 0; j < script_script_set[i].length; ++j) {
 					if (script_script_set[i][j] == null)
 						continue;
 					accept.addAll(script_script_set[i][j]);
 					curr.add(new UnicodeSetToScript().setScript(j).setSet(
 							script_script_set[i][j]));
 					if (DEBUG && i == UScript.LATIN)
 						System.out.println(UScript.getName(i) + "; "
 								+ UScript.getName(j) + "; "
 								+ script_script_set[i][j]);
 				}
 				if (curr.size() == 0)
 					continue;
 				scriptToUnicodeSetToScript[i] = (UnicodeSetToScript[]) curr
 						.toArray(new UnicodeSetToScript[curr.size()]);
 				fastReject[i] = accept.complement();
 				if (DEBUG && i == UScript.LATIN)
 					System.out.println(UScript.getName(i) + "; "
 							+ fastReject[i]);
 			}
 		} catch (Exception e) {
 			throw (RuntimeException) new RuntimeException("Failure on line "
 					+ line).initCause(e);
 		}
 		br.close();
 	}
 	/*
 	 * for this routine, we don't care what the targetScripts are, just whether
 	 * there is at least one whole-script confusable.
 	 */
 	boolean hasWholeScriptConfusable(String givenString, BitSet resultingScripts) {
 		int givenScript = getSingleScript(givenString);
 		if (givenScript == UScript.INVALID_CODE)
 			return false;
 		UnicodeSet givenSet = new UnicodeSet().addAll(givenString).removeAll(
 				commonAndInherited);
 		return hasWholeScriptConfusable(givenScript, givenSet, resultingScripts);
 	}
 	/**
 	 *  
 	 */
 	private boolean hasWholeScriptConfusable(int givenScript,
 			UnicodeSet givenSet, BitSet resultingScripts) {
 		resultingScripts.clear();
 		if (fastReject[givenScript] == null)
 			return false;
 		if (fastReject[givenScript].containsSome(givenSet))
 			return false;
 		UnicodeSetToScript[] possibles = scriptToUnicodeSetToScript[givenScript];
 		for (int i = 0; i < possibles.length; ++i) {
 			if (possibles[i].set.containsAll(givenSet)) {
 				resultingScripts.set(possibles[i].script);
 			}
 		}
 		return !resultingScripts.isEmpty();
 	}
 	/*
 	 * for this routine, we don't care what the targetScripts are, just
 	 * whether there is at least one whole-script confusable.
 	 */
 	boolean hasMixedScriptConfusable(String givenString) {
 		UnicodeSet givenSet = new UnicodeSet().addAll(givenString).removeAll(
 				commonAndInherited);
 		UnicodeSet[] byScript = getScripts(givenSet);
 		BitSet wholeScripts = new BitSet();
 		boolean result = false;
 		main: for (int i = 0; i < byScript.length; ++i) {
 			if (byScript[i] == null)
 				continue;
 			// see if the other characters have whole script confusables in
 			// my script
 			for (int j = 0; j < byScript.length; ++j) {
 				if (j == i || byScript[j] == null)
 					continue;
 				if (!hasWholeScriptConfusable(j, byScript[j], wholeScripts))
 					continue main;
 				if (!wholeScripts.get(i))
 					continue main; // doesn't have the
 				// one we want
 				result = true;
 			}
 			return result; // passed the guantlet
 		}
 		return false;
 	}
 	/*
 	 * Returns UScript.INVALID_CODE if mixed script, otherwise the script
 	 */
 	public static int getSingleScript(String source) {
 		int lastScript = UScript.INVALID_CODE;
 		int cp;
 		for (int i = 0; i < source.length(); i += UTF16.getCharCount(cp)) {
 			cp = UTF16.charAt(source, i);
 			int script = UScript.getScript(cp);
 			if (script == UScript.COMMON || script == UScript.INHERITED) {
 				if (XIDContinueSet.contains(cp)) {
 					if (lastScript == UScript.INVALID_CODE)
 						lastScript = script;
 					continue; // skip if not identifier
 				}
 				script = UScript.COMMON;
 			}
 			if (lastScript == UScript.INVALID_CODE)
 				lastScript = script;
 			else if (script != lastScript)
 				return UScript.INVALID_CODE;
 		}
 		return lastScript;
 	}
 	public static UnicodeSet[] getScripts(UnicodeSet sourceSet) {
 		UnicodeSet[] byScript = new UnicodeSet[UScript.CODE_LIMIT];
 		for (UnicodeSetIterator usi = new UnicodeSetIterator(sourceSet); usi
 				.next();) {
 			int script = UScript.getScript(usi.codepoint);
 			if (byScript[script] == null)
 				byScript[script] = new UnicodeSet();
 			byScript[script].add(usi.codepoint);
 		}
 		return byScript;
 	}
 }
--- a/tools/unicodetools/com/ibm/text/UCD/TestNameUniqueness.java
+++ b/tools/unicodetools/com/ibm/text/UCD/TestNameUniqueness.java
@ -1,187 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestNameUniqueness.java,v $
 * $Date: 2004/10/14 17:54:56 $
 * $Revision: 1.3 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCD;
 import java.util.*;
 import java.io.*;
 import java.text.DateFormat;
 import java.text.SimpleDateFormat;
 import com.ibm.text.utility.*;
 import com.ibm.icu.dev.test.util.BagFormatter;
 import com.ibm.icu.dev.test.util.UnicodeProperty;
 import com.ibm.icu.text.UnicodeSet;
 public class TestNameUniqueness implements UCD_Types {
    public static void main(String[] args) throws Exception {
        checkNameList();
        // new TestNameUniqueness().checkNames();
    }
    Map names = new HashMap();
    int[] charCount = new int[128];
    int[] samples = new int[128];
    public static class NameIterator {
        int fileCount = -1;
        String line;
        BufferedReader br;
        String[] pieces = new String[3];
        /**
         * @return null when done
         */
        static String[][] files = {
                {"C:\\DATA\\", "pdam1040630.lst"},
 				{"C:\\DATA\\UCD\\4.1.0-Update\\", "NamedCompositeEntities-4.1.0d2.txt"}
        };
        public String next() {
            while (true) {
            try {
 				if (br != null) line = br.readLine();
 				if (line == null) {
 				    fileCount++;
 				    br = BagFormatter.openReader(files[fileCount][0], files[fileCount][1], "ISO-8859-1");
 				    line = br.readLine();
 				}
 			} catch (IOException e) {}
            if (line == null) return null;
            if (line.length() == 0) continue;
            if (fileCount == 0) {
                char c = line.charAt(0);
                // skip if doesn't start with hex digit
                if (!(('0' <= c && c <= '9') || ('A' <= c && c <= 'F'))) continue;
                Utility.split(line,'\t',pieces,true);
                Utility.split(pieces[1],'(',pieces,true);
                Utility.split(pieces[0],'*',pieces,true);
                return pieces[0];
            } else {
            	Utility.split(line,';',pieces,true); 
                return pieces[1];
            }
            //throw new IllegalArgumentException("Illegal file type");
           }
        }
    }
    public static void checkNameList() throws IOException {
        Map map = new HashMap();
        NameIterator nameIterator = new NameIterator();
        int lineCount = 0;
        while (true) {
        	String name = nameIterator.next();
            if (name == null) break;
            String key;
 			try {
                if (name.startsWith("<")) key = name;
 				else key = UnicodeProperty.toNameSkeleton(name);
 			} catch (RuntimeException e) {
 				System.out.println("Error on " + nameIterator.line);
                throw e;
 			}
 			Object value = map.get(key);
            if (value != null && !key.startsWith("<")) {
                System.out.println("*!*!*!* Collision at " + key + " between: ");
                System.out.println("\t" + value);
                System.out.println("\t" + nameIterator.line);
            	//throw new IllegalArgumentException();
            }
            map.put(key, nameIterator.line);
            if (nameIterator.line.startsWith("116C")
                || nameIterator.line.startsWith("1180")
                || name.indexOf('-') >= 0 
                || (lineCount++ % 1000) == 0) {
                System.out.println("[" + lineCount + "]\t" + nameIterator.line + "\t" + name);
                System.out.println("\t" + name);
                System.out.println("\t" + key);
            }
        }
    }
    void checkNames() throws IOException {
        PrintWriter out = Utility.openPrintWriter("name_uniqueness.txt", Utility.LATIN1_WINDOWS);
        try {
            out.println("Collisions");
            out.println();
            for (int cp = 0; cp < 0x10FFFF; ++cp) {
                Utility.dot(cp);
                if (!Default.ucd().isAllocated(cp)) continue;
                if (Default.ucd().hasComputableName(cp)) continue;
                int cat = Default.ucd().getCategory(cp);
                if (cat == Cc) continue;
                String name = Default.ucd().getName(cp);
                String processedName = processName(cp, name);
                Integer existing = (Integer) names.get(processedName);
                if (existing != null) {
                    out.println("Collision between: "
                        + Default.ucd().getCodeAndName(existing.intValue())
                        + ", " + Default.ucd().getCodeAndName(cp));
                } else {
                    names.put(processedName, new Integer(cp));
                }
            }
            out.println();
            out.println("Samples");
            out.println();
            for (int i = 0; i < charCount.length; ++i) {
                int count = charCount[i];
                if (count == 0) continue;
                String sampleName = Default.ucd().getCodeAndName(samples[i]);
                out.println(count + "\t'" + ((char)i)
                    + "'\t" + Default.ucd().getCodeAndName(samples[i])
                    + "\t=>\t" + processName(samples[i], Default.ucd().getName(samples[i])));
            }
            out.println();
            out.println("Name Samples");
            out.println();
            for (int i = 0; i < 256; ++i) {
                int cat = Default.ucd().getCategory(i);
                if (cat == Cc) continue;
                out.println(Default.ucd().getCodeAndName(i)
                    + "\t=>\t" + processName(i, Default.ucd().getName(i)));
            }
        } finally {
            out.close();
        }
    }
    static final String[][] replacements = {
        //{"SMALL LETTER", ""},
        {"LETTER", ""},
        {"CHARACTER", ""},
        {"DIGIT", ""},
        {"SIGN", ""},
        //{"WITH", ""},
    };
    StringBuffer processNamesBuffer = new StringBuffer();
    String processName(int codePoint, String name) {
        name = Utility.replace(name, replacements);
        processNamesBuffer.setLength(0);
        for (int i = 0; i < name.length(); ++i) {
            char c = name.charAt(i);
            ++charCount[c];
            if (samples[c] == 0) samples[c] = codePoint;
            if ('A' <= c && c <= 'Z'
                || '0' <= c && c <= '9') processNamesBuffer.append(c);
        }
        if (processNamesBuffer.length() == name.length()) return name;
        return processNamesBuffer.toString();
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/TestNormalization.java
+++ b/tools/unicodetools/com/ibm/text/UCD/TestNormalization.java
@ -1,246 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestNormalization.java,v $
 * $Date: 2004/02/12 08:23:16 $
 * $Revision: 1.8 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCD;
 import java.util.*;
 import java.io.*;
 import com.ibm.icu.dev.test.util.BagFormatter;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.text.UnicodeSetIterator;
 import com.ibm.text.utility.*;
 public final class TestNormalization {
    static final String DIR = "C:\\Documents and Settings\\Davis\\My Documents\\UnicodeData\\Update 3.0.1\\";
    static final boolean SKIP_FILE = true;
    static PrintWriter out = null;
    static BufferedReader in = null;
    static BitSet charsListed = new BitSet(0x110000);
    static int errorCount = 0;
    static int lineErrorCount = 0;
    static String originalLine = "";
    static String lastLine = "";
    public static void main(String[] args)  throws java.io.IOException {
        System.out.println("Creating Normalizers");
        String[] testSet = {"a\u0304\u0328", "a\u0328\u0304"};
        for (int i = 0; i < testSet.length; ++i) {
            String s = testSet[i];
            boolean test = Default.nfc().isFCD(s);
            System.out.println(test + ": " + Default.ucd().getCodeAndName(s));
        }
            String x = UTF32.valueOf32(0x10000);
            check("NFC", Default.nfc(), x);
            check("NFD", Default.nfd(), x);
            check("NFKC", Default.nfkc(), x);
            check("NFKD", Default.nfkd(), x);
        out = new PrintWriter(
            new BufferedWriter(
            new OutputStreamWriter(
                new FileOutputStream("NormalizationTestLog.txt"),
                "UTF8"),
            32*1024));
        in = new BufferedReader (
            new FileReader (DIR + "NormalizationTest.txt"),
            32*1024);
        try {
            String[] parts = new String[10];
            System.out.println("Checking files");
            int count = 0;
            while (true) {
                String line = in.readLine();
                if ((count++ & 0x3FF) == 0) System.out.println("#LINE: " + line);
                if (line == null) break;
                originalLine = line;
                int pos = line.indexOf('#');
                if (pos >= 0) {
                    line = line.substring(0,pos);
                }
                line = line.trim();
                if (line.length() == 0) continue;
                int splitCount = Utility.split(line, ';', parts);
                // FIX check splitCount
                for (int i = 0; i < splitCount; ++i) {
                    parts[i] = Utility.fromHex(parts[i]);
                }
                if (UTF32.length32(parts[0]) == 1) {
                    int code = UTF32.char32At(parts[0],0);
                    charsListed.set(code);
                    if ((code & 0x3FF) == 0) System.out.println("# " + Utility.hex(code));
                }
                // c2 == NFC(c1) == NFC(c2) == NFC(c3)
                errorCount += check("NFCa", Default.nfc(), parts[1], parts[0]);
                errorCount += check("NFCb", Default.nfc(), parts[1], parts[1]);
                errorCount += check("NFCc", Default.nfc(), parts[1], parts[2]);
                // c4 == NFC(c4) == NFC(c5)
                errorCount += check("NFCd", Default.nfc(), parts[3], parts[3]);
                errorCount += check("NFCe", Default.nfc(), parts[3], parts[4]);
                // c3 == NFD(c1) == NFD(c2) == NFD(c3)
                errorCount += check("NFDa", Default.nfd(), parts[2], parts[0]);
                errorCount += check("NFDb", Default.nfd(), parts[2], parts[1]);
                errorCount += check("NFDc", Default.nfd(), parts[2], parts[2]);
                // c5 == NFD(c4) == NFD(c5)
                errorCount += check("NFDd", Default.nfd(), parts[4], parts[3]);
                errorCount += check("NFDe", Default.nfd(), parts[4], parts[4]);
                // c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
                errorCount += check("NFKCa", Default.nfkc(), parts[3], parts[0]);
                errorCount += check("NFKCb", Default.nfkc(), parts[3], parts[1]);
                errorCount += check("NFKCc", Default.nfkc(), parts[3], parts[2]);
                errorCount += check("NFKCd", Default.nfkc(), parts[3], parts[3]);
                errorCount += check("NFKCe", Default.nfkc(), parts[3], parts[4]);
                // c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
                errorCount += check("NFKDa", Default.nfkd(), parts[4], parts[0]);
                errorCount += check("NFKDb", Default.nfkd(), parts[4], parts[1]);
                errorCount += check("NFKDc", Default.nfkd(), parts[4], parts[2]);
                errorCount += check("NFKDd", Default.nfkd(), parts[4], parts[3]);
                errorCount += check("NFKDe", Default.nfkd(), parts[4], parts[4]);
            }
            System.out.println("Total errors in file: " + errorCount
                + ", lines: " + lineErrorCount);
            errorCount = lineErrorCount = 0;
            System.out.println("Checking Missing");
            checkMissing();
            System.out.println("Total errors in unlisted items: " + errorCount
                + ", lines: " + lineErrorCount);
        } finally {
            if (in != null) in.close();
            if (out != null) out.close();
        }
    }
    static String lastBase = "";
    public static int check(String type, Normalizer n, String base, String other) {
        try {
            String trans = n.normalize(other);
            if (!trans.equals(base)) {
                String temp = "";
                if (!lastLine.equals(originalLine)) {
                    temp = "// " + originalLine;
                    lastLine = originalLine;
                }
                if (!base.equals(lastBase)) {
                    lastBase = base;
                    lineErrorCount++;
                }
                String otherList = "";
                if (!base.equals(other)) {
                    otherList = "(" + Default.ucd().getCodeAndName(other) + ")";
                }
                out.println("DIFF " + type + ": "
                    + Default.ucd().getCodeAndName(base) + " != "
                    + type
                    + otherList
                    + " == " + Default.ucd().getCodeAndName(trans)
                    + temp
                );
                return 1;
            }
        } catch (Exception e) {
            throw new ChainException("DIFF " + type + ": "
                + Default.ucd().getCodeAndName(base) + " != "
                + type + "(" + Default.ucd().getCodeAndName(other) + ")", new Object[]{}, e);
        }
        return 0;
    }
    public static int check(String type, Normalizer n, String base) {
        return check(type, n, base, base);
    }
    static void checkMissing() {
        for (int missing = 0; missing < 0x100000; ++missing) {
            if ((missing & 0xFFF) == 0) System.out.println("# " + Utility.hex(missing));
            if (charsListed.get(missing)) continue;
            String x = UTF32.valueOf32(missing);
            errorCount += check("NFC", Default.nfc(), x);
            errorCount += check("NFD", Default.nfd(), x);
            errorCount += check("NFKC", Default.nfkc(), x);
            errorCount += check("NFKD", Default.nfkd(), x);
        }
    }
    public static void checkStarters () {
        System.out.println("Checking Starters");
        UnicodeSet leading = new UnicodeSet();
        UnicodeSet trailing = new UnicodeSet();
        for (int i = 0; i <= 0x10FFFF; ++i) {
            if (Default.nfc().isLeading(i)) leading.add(i);
            if (Default.ucd().getCombiningClass(i) != 0) continue;
            if (Default.nfc().isTrailing(i)) trailing.add(i);
        }
        System.out.println("Leading: " + leading.size());
        System.out.println("Trailing Starters: " + trailing.size());
        UnicodeSetIterator lead = new UnicodeSetIterator(leading);
        UnicodeSetIterator trail = new UnicodeSetIterator(trailing);
        UnicodeSet followers = new UnicodeSet();
        Map map = new TreeMap(new CompareProperties.UnicodeSetComparator());
        while (lead.next()) {
            trail.reset();
            followers.clear();
            while (trail.next()) {
                if (Default.nfc().getComposition(lead.codepoint, trail.codepoint) != 0xFFFF) {
                    followers.add(trail.codepoint);
                }
            }
            if (followers.size() == 0) continue;
            System.out.println(Default.ucd().getCode(lead.codepoint)
                + "\t" + followers.toPattern(true));
            UnicodeSet possLead = (UnicodeSet) map.get(followers);
            if (possLead == null) {
                possLead = new UnicodeSet();
                map.put(followers.clone(), possLead);
            }
            possLead.add(lead.codepoint);
        }
        Iterator it = map.keySet().iterator();
        BagFormatter bf = new BagFormatter();
        bf.setLineSeparator("<br>");
        bf.setLabelSource(null);
        bf.setAbbreviated(true);
        while (it.hasNext()) {
            UnicodeSet t = (UnicodeSet) it.next();
            UnicodeSet l = (UnicodeSet) map.get(t);
            System.out.println("<tr><td>" 
                + bf.showSetNames(l)
                + "</td><td>"
                + bf.showSetNames(t)
                + "</td></tr>");
        }
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/TestUnicodeInvariants.java
+++ b/tools/unicodetools/com/ibm/text/UCD/TestUnicodeInvariants.java
@ -1,259 +0,0 @@
 package com.ibm.text.UCD;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.PrintWriter;
 import java.text.ParseException;
 import java.text.ParsePosition;
 import java.util.Arrays;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Locale;
 import com.ibm.icu.dev.test.util.BagFormatter;
 import com.ibm.icu.dev.test.util.Tabber;
 import com.ibm.icu.dev.test.util.TransliteratorUtilities;
 import com.ibm.icu.dev.tool.UOption;
 import com.ibm.icu.text.SymbolTable;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeMatcher;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.text.utility.Utility;
 public class TestUnicodeInvariants {
    private static final int
    HELP1 = 0,
    FILE = 1,
    RANGE = 2,
    TABLE = 3
    ;
    private static final UOption[] options = {
        UOption.HELP_H(),
        UOption.create("file", 'f', UOption.REQUIRES_ARG),
        UOption.create("norange", 'n', UOption.NO_ARG),
        UOption.create("table", 't', UOption.NO_ARG),
    };
    public static void main(String[] args) throws IOException {
        UOption.parseArgs(args, options);
    	String file = "UnicodeInvariants.txt";
    	if (options[FILE].doesOccur) file = options[FILE].value;
    	boolean doRange = !options[RANGE].doesOccur;
        System.out.println("File:\t" + file);
        System.out.println("Ranges?\t" + doRange);
        System.out.println("HTML?\t" + options[TABLE].doesOccur);
        testInvariants(file, doRange);
    }
    /**
    * Chain together several SymbolTables. 
    * @author Davis
    */
   static class ChainedSymbolTable implements SymbolTable {
       // TODO: add accessors?
       private List symbolTables;
       /**
        * Each SymbolTable is each accessed in order by the other methods,
        * so the first in the list is accessed first, etc.
        * @param symbolTables
        */
       ChainedSymbolTable(SymbolTable[] symbolTables) {
           this.symbolTables = Arrays.asList(symbolTables);
       }
       public char[] lookup(String s) {
           for (Iterator it = symbolTables.iterator(); it.hasNext();) {
               SymbolTable st = (SymbolTable) it.next();
               char[] result = st.lookup(s);
               if (result != null) return result;
           }
           return null;
       }
       public UnicodeMatcher lookupMatcher(int ch) {
           for (Iterator it = symbolTables.iterator(); it.hasNext();) {
               SymbolTable st = (SymbolTable) it.next();
               UnicodeMatcher result = st.lookupMatcher(ch);
               if (result != null) return result;
           }
           return null;
       }
       // Warning: this depends on pos being left alone unless a string is returned!!
       public String parseReference(String text, ParsePosition pos, int limit) {
           for (Iterator it = symbolTables.iterator(); it.hasNext();) {
               SymbolTable st = (SymbolTable) it.next();
               String result = st.parseReference(text, pos, limit);
               if (result != null) return result;
           }
           return null;
       }
   }
   static final UnicodeSet INVARIANT_RELATIONS = new UnicodeSet("[\\~ \\= \\! \\? \\< \\> \u2264 \u2265 \u2282 \u2286 \u2283 \u2287]");
   public static void testInvariants(String outputFile, boolean doRange) throws IOException {
       String[][] variables = new String[100][2];
       int variableCount = 0;
       PrintWriter out = BagFormatter.openUTF8Writer(UCD_Types.GEN_DIR, "UnicodeInvariantResults.txt");
       out.write('\uFEFF'); // BOM
       BufferedReader in = BagFormatter.openUTF8Reader("com/ibm/text/UCD/", outputFile);
       BagFormatter errorLister = new BagFormatter();
       errorLister.setMergeRanges(doRange);
       errorLister.setUnicodePropertyFactory(ToolUnicodePropertySource.make(""));
       errorLister.setShowLiteral(TransliteratorUtilities.toXML);
       if (options[TABLE].doesOccur) errorLister.setTabber(new Tabber.HTMLTabber());
       BagFormatter showLister = new BagFormatter();
       showLister.setUnicodePropertyFactory(ToolUnicodePropertySource.make(""));
       showLister.setMergeRanges(doRange);
       showLister.setShowLiteral(TransliteratorUtilities.toXML);
       if (options[TABLE].doesOccur) showLister.setTabber(new Tabber.HTMLTabber());
       ChainedSymbolTable st = new ChainedSymbolTable(new SymbolTable[] {
           ToolUnicodePropertySource.make(UCD.lastVersion).getSymbolTable("\u00D7"),
           ToolUnicodePropertySource.make(Default.ucdVersion()).getSymbolTable("")});
       ParsePosition pp = new ParsePosition(0);
       int parseErrorCount = 0;
       int testFailureCount = 0;
       while (true) {
           String line = in.readLine();
           if (line == null) break;
           if (line.startsWith("\uFEFF")) line = line.substring(1);
           out.println(line);
           line = line.trim();
           int pos = line.indexOf('#');
           if (pos >= 0) line = line.substring(0,pos).trim();
           if (line.length() == 0) continue;
           if (line.equalsIgnoreCase("Stop")) break;
           // fix all the variables
           String oldLine = line;
           line = Utility.replace(line, variables, variableCount);
           // detect variables
           if (line.startsWith("Let")) {
               int x = line.indexOf('=');
               variables[variableCount][0] = line.substring(3,x).trim();
               variables[variableCount][1] = line.substring(x+1).trim();
               variableCount++;
               if (false) System.out.println("Added variable: <" + variables[variableCount-1][0] + "><"
                        + variables[variableCount-1][1] + ">");
               continue;
           }
           // detect variables
           if (line.startsWith("Show")) {
           		String part = line.substring(4).trim();
           		if (part.startsWith("Each")) {
           			part = part.substring(4).trim();
           			showLister.setMergeRanges(false);
           		}
           		pp.setIndex(0);
           		UnicodeSet leftSet = new UnicodeSet(part, pp, st);
           		showLister.showSetNames(out, leftSet);
           		showLister.setMergeRanges(doRange);
 				continue;
           }
           if (line.startsWith("Test")) {
        	   line = line.substring(4).trim();
           }
          char relation = 0;
           String rightSide = null;
           String leftSide = null;
           UnicodeSet leftSet = null;
           UnicodeSet rightSet = null;
           try {
               pp.setIndex(0);
               leftSet = new UnicodeSet(line, pp, st);
               leftSide = line.substring(0,pp.getIndex());
               eatWhitespace(line, pp);
               relation = line.charAt(pp.getIndex());
               if (!INVARIANT_RELATIONS.contains(relation)) {
                   throw new ParseException("Invalid relation, must be one of " + INVARIANT_RELATIONS.toPattern(false),
                       pp.getIndex());
               }
               pp.setIndex(pp.getIndex()+1); // skip char
               eatWhitespace(line, pp);
               int start = pp.getIndex();
               rightSet = new UnicodeSet(line, pp, st);
               rightSide = line.substring(start,pp.getIndex());
               eatWhitespace(line, pp);
               if (line.length() != pp.getIndex()) {
                   throw new ParseException("Extra characters at end", pp.getIndex());
               }
           } catch (ParseException e) {
               out.println("PARSE ERROR:\t" + line.substring(0,e.getErrorOffset())
                   + "<@>" + line.substring(e.getErrorOffset()));
               out.println();
               out.println("**** START Error Info ****");
               out.println(e.getMessage());
               out.println("**** END Error Info ****");
               out.println();
               parseErrorCount++;
               continue;
           } catch (IllegalArgumentException e) {
               out.println("PARSE ERROR:\t" + line);
               out.println();
               out.println("**** START Error Info ****");
               out.println(e.getMessage());
               out.println("**** END Error Info ****");
               out.println();
               parseErrorCount++;
               continue;
           }
           boolean ok = true;
           switch(relation) {
               case '=': case '\u2261': ok = leftSet.equals(rightSet); break;
               case '<': case '\u2282': ok = rightSet.containsAll(leftSet) && !leftSet.equals(rightSet); break;
               case '>': case '\u2283': ok = leftSet.containsAll(rightSet) && !leftSet.equals(rightSet); break;
               case '\u2264': case '\u2286': ok = rightSet.containsAll(leftSet); break;
               case '\u2265': case '\u2287': ok = leftSet.containsAll(rightSet); break;
               case '!': ok = leftSet.containsNone(rightSet); break;
               case '?': ok = !leftSet.equals(rightSet) 
                       && !leftSet.containsAll(rightSet) 
                       && !rightSet.containsAll(leftSet)
                       && !leftSet.containsNone(rightSet); 
                   break;
               default: throw new IllegalArgumentException("Internal Error");
           }
           if (ok) continue;
           out.println();
           out.println(String.valueOf(ok).toUpperCase(Locale.ENGLISH));
           out.println("**** START Error Info ****");
           errorLister.showSetDifferences(out, rightSide, rightSet, leftSide, leftSet);
           out.println("**** END Error Info ****");
           out.println();
           testFailureCount++;      
       }
       out.println();
       out.println("**** SUMMARY ****");
       out.println();
       out.println("ParseErrorCount=" + parseErrorCount);
       out.println("TestFailureCount=" + testFailureCount);
       out.close();
       System.out.println("ParseErrorCount=" + parseErrorCount);
       System.out.println("TestFailureCount=" + testFailureCount);
   }
   /**
    * @param line
    * @param pp
    */
   private static void eatWhitespace(String line, ParsePosition pp) {
       int cp = 0;
       int i;
       for (i = pp.getIndex(); i < line.length(); i += UTF16.getCharCount(cp)) {
           cp = UTF16.charAt(line, i);
           if (!com.ibm.icu.lang.UCharacter.isUWhiteSpace(cp)) {
               break;
           }
       }
       pp.setIndex(i);
   }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/ToolUnicodePropertySource.java
+++ b/tools/unicodetools/com/ibm/text/UCD/ToolUnicodePropertySource.java
@ -1,780 +0,0 @@
 package com.ibm.text.UCD;
 import java.text.NumberFormat;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.TreeSet;
 import com.ibm.icu.dev.test.util.UnicodeMap;
 import com.ibm.icu.dev.test.util.UnicodeProperty;
 import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.text.utility.Utility;
 public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
  static final boolean DEBUG = false;
  private UCD ucd;
  private Normalizer nfc, nfd, nfkd, nfkc;
  private static boolean needAgeCache = true;
  private static UCD[] ucdCache = new UCD[UCD_Types.LIMIT_AGE];
  private static HashMap factoryCache = new HashMap();
  public static synchronized ToolUnicodePropertySource make(String version) {
    ToolUnicodePropertySource result = (ToolUnicodePropertySource) factoryCache.get(version);
    if (result != null)
      return result;
    result = new ToolUnicodePropertySource(version);
    factoryCache.put(version, result);
    return result;
  }
  private ToolUnicodePropertySource(String version) {
    ucd = UCD.make(version);
    nfc = new Normalizer(Normalizer.NFC, ucd.getVersion());
    nfd = new Normalizer(Normalizer.NFD, ucd.getVersion());
    nfkc = new Normalizer(Normalizer.NFKC, ucd.getVersion());
    nfkd = new Normalizer(Normalizer.NFKD, ucd.getVersion());
    version = ucd.getVersion(); // regularize
    // first the special cases
    if (DEBUG)
      System.out.println("Adding Simple Cases");
    add(new UnicodeProperty.SimpleProperty() {
      public String _getValue(int codepoint) {
        if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0)
          return null;
        return ucd.getName(codepoint);
      }
    }.setValues("<string>").setMain("Name", "na", UnicodeProperty.MISC, version));
    add(new UnicodeProperty.SimpleProperty() {
      public String _getValue(int codepoint) {
        if (DEBUG && codepoint == 0x1D100) {
          System.out.println("here");
        }
        //if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
        return ucd.getBlock(codepoint);
      }
      protected UnicodeMap _getUnicodeMap() {
        return ucd.blockData;
      }
    }.setValues(ucd.getBlockNames(null)).setMain("Block", "blk", UnicodeProperty.CATALOG, version));
    add(new UnicodeProperty.SimpleProperty() {
      public String _getValue(int codepoint) {
        //if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
        return ucd.getBidiMirror(codepoint);
      }
    }.setValues("<string>").setMain("Bidi_Mirroring_Glyph", "bmg", UnicodeProperty.STRING, version));
    add(new UnicodeProperty.SimpleProperty() {
      public String _getValue(int codepoint) {
        //if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
        return ucd.getCase(codepoint, UCD_Types.FULL, UCD_Types.FOLD);
      }
    }.setValues("<string>").setMain("Case_Folding", "cf", UnicodeProperty.STRING, version));
    add(new UnicodeProperty.SimpleProperty() {
      NumberFormat nf = NumberFormat.getInstance();
      {
        nf.setGroupingUsed(false);
        nf.setMaximumFractionDigits(8);
        nf.setMinimumFractionDigits(1);
      }
      public String _getValue(int codepoint) {
        double num = ucd.getNumericValue(codepoint);
        if (Double.isNaN(num))
          return null;
        return nf.format(num);
      }
    }.setMain("Numeric_Value", "nv", UnicodeProperty.NUMERIC, version));
    add(new UnicodeProperty.SimpleProperty() {
      public String _getValue(int cp) {
        if (!ucd.isRepresented(cp))
          return null;
        String b = nfkc.normalize(ucd.getCase(cp, UCD_Types.FULL, UCD_Types.FOLD));
        String c = nfkc.normalize(ucd.getCase(b, UCD_Types.FULL, UCD_Types.FOLD));
        if (c.equals(b))
          return null;
        return c;
      }
      public int getMaxWidth(boolean isShort) {
        return 14;
      }
    }.setMain("FC_NFKC_Closure", "FC_NFKC", UnicodeProperty.STRING, version)
    //.addName("FNC")
    );
    add(new UnicodeProperty.SimpleProperty() {
      public String _getValue(int codepoint) {
        if (!nfd.isNormalized(codepoint))
          return "No";
        else if (nfd.isTrailing(codepoint))
          throw new IllegalArgumentException("Internal Error!");
        else
          return "Yes";
      }
      public int getMaxWidth(boolean isShort) {
        return 15;
      }
    }.setValues(LONG_YES_NO, YES_NO).swapFirst2ValueAliases().setMain("NFD_Quick_Check", "NFD_QC", UnicodeProperty.ENUMERATED, version));
    add(new UnicodeProperty.SimpleProperty() {
      public String _getValue(int codepoint) {
        if (!nfc.isNormalized(codepoint))
          return "No";
        else if (nfc.isTrailing(codepoint))
          return "Maybe";
        else
          return "Yes";
      }
      public int getMaxWidth(boolean isShort) {
        return 15;
      }
    }.setValues(LONG_YES_NO_MAYBE, YES_NO_MAYBE).swapFirst2ValueAliases().setMain("NFC_Quick_Check", "NFC_QC", UnicodeProperty.ENUMERATED, version));
    add(new UnicodeProperty.SimpleProperty() {
      public String _getValue(int codepoint) {
        if (!nfkd.isNormalized(codepoint))
          return "No";
        else if (nfkd.isTrailing(codepoint))
          throw new IllegalArgumentException("Internal Error!");
        else
          return "Yes";
      }
      public int getMaxWidth(boolean isShort) {
        return 15;
      }
    }.setValues(LONG_YES_NO, YES_NO).swapFirst2ValueAliases().setMain("NFKD_Quick_Check", "NFKD_QC", UnicodeProperty.ENUMERATED, version));
    add(new UnicodeProperty.SimpleProperty() {
      public String _getValue(int codepoint) {
        if (!nfkc.isNormalized(codepoint))
          return "No";
        else if (nfkc.isTrailing(codepoint))
          return "Maybe";
        else
          return "Yes";
      }
      public int getMaxWidth(boolean isShort) {
        return 15;
      }
    }.setValues(LONG_YES_NO_MAYBE, YES_NO_MAYBE).swapFirst2ValueAliases().setMain("NFKC_Quick_Check", "NFKC_QC", UnicodeProperty.ENUMERATED, version));
    /*
     add(new UnicodeProperty.SimpleProperty() {
     public String _getValue(int codepoint) {
     if (!nfx.isNormalized(codepoint)) return NO;
     else if (nfx.isTrailing(codepoint)) return MAYBE;
     else return "";
     }
     }.setMain("NFD_QuickCheck", "nv", UnicodeProperty.NUMERIC, version)
     .setValues("<number>"));
     */
    // Now the derived properties
    if (DEBUG)
      System.out.println("Derived Properties");
    for (int i = 0; i < DerivedProperty.DERIVED_PROPERTY_LIMIT; ++i) {
      UCDProperty prop = DerivedProperty.make(i);
      if (prop == null)
        continue;
      if (!prop.isStandard())
        continue;
      String name = prop.getName();
      if (getProperty(name) != null) {
        if (DEBUG)
          System.out.println("Iterated Names: " + name + ", ALREADY PRESENT*");
        continue; // skip if already there
      }
      int type = prop.getValueType();
      if (i == UCD_Types.FC_NFKC_Closure)
        type = UnicodeProperty.STRING;
      else if (i == UCD_Types.FullCompExclusion)
        type = UnicodeProperty.BINARY;
      else
        type = remapUCDType(type);
      if (DEBUG)
        System.out.println(prop.getName());
      add(new UCDPropertyWrapper(prop, type, false));
    }
    // then the general stuff
    if (DEBUG)
      System.out.println("Other Properties");
    List names = new ArrayList();
    UnifiedProperty.getAvailablePropertiesAliases(names, ucd);
    Iterator it = names.iterator();
    while (it.hasNext()) {
      String name = (String) it.next();
      if (getProperty(name) != null) {
        if (DEBUG)
          System.out.println("Iterated Names: " + name + ", ALREADY PRESENT");
        continue; // skip if already there
      }
      if (DEBUG)
        System.out.println("Iterated Names: " + name);
      add(new ToolUnicodeProperty(name));
    }
    int compositeVersion = ucd.getCompositeVersion();
    if (compositeVersion >= 0x040000) add(new UnicodeProperty.UnicodeMapProperty() {
      {
        unicodeMap = new UnicodeMap();
        unicodeMap.setErrorOnReset(true);
        unicodeMap.put(0xD, "CR");
        unicodeMap.put(0xA, "LF");
        UnicodeProperty cat = getProperty("General_Category");
        UnicodeSet temp = cat.getSet("Line_Separator").addAll(cat.getSet("Paragraph_Separator")).addAll(cat.getSet("Control")).addAll(cat.getSet("Format")).remove(0xD).remove(0xA).remove(0x200C)
            .remove(0x200D);
        unicodeMap.putAll(temp, "Control");
        UnicodeSet graphemeExtend = getProperty("Grapheme_Extend").getSet("true");
        unicodeMap.putAll(graphemeExtend, "Extend");
        UnicodeProperty hangul = getProperty("Hangul_Syllable_Type");
        unicodeMap.putAll(hangul.getSet("L"), "L");
        unicodeMap.putAll(hangul.getSet("V"), "V");
        unicodeMap.putAll(hangul.getSet("T"), "T");
        unicodeMap.putAll(hangul.getSet("LV"), "LV");
        unicodeMap.putAll(hangul.getSet("LVT"), "LVT");
        unicodeMap.setMissing("Other");
      }
    }.setMain("Grapheme_Cluster_Break", "GCB", UnicodeProperty.ENUMERATED, version).addValueAliases(new String[][] { { "Control", "CN" }, { "Extend", "EX" }, { "Other", "XX" }, }, true)
        .swapFirst2ValueAliases());
    if (compositeVersion >= 0x040000) add(new UnicodeProperty.UnicodeMapProperty() {
      {
        unicodeMap = new UnicodeMap();
        unicodeMap.setErrorOnReset(true);
        UnicodeProperty cat = getProperty("General_Category");
        unicodeMap.putAll(cat.getSet("Format").remove(0x200C).remove(0x200D), "Format");
        UnicodeProperty script = getProperty("Script");
        unicodeMap.putAll(script.getSet("Katakana").addAll(new UnicodeSet("[\u3031\u3032\u3033\u3034\u3035\u309B\u309C\u30A0\u30FC\uFF70\uFF9E\uFF9F]")), "Katakana");
        Object foo = unicodeMap.getSet("Katakana");
        UnicodeSet graphemeExtend = getProperty("Grapheme_Extend").getSet("true");
        UnicodeProperty lineBreak = getProperty("Line_Break");
        unicodeMap.putAll(getProperty("Alphabetic").getSet("true").add(0x05F3).removeAll(getProperty("Ideographic").getSet("true")).removeAll(unicodeMap.getSet("Katakana"))
        //.removeAll(script.getSet("Thai"))
            //.removeAll(script.getSet("Lao"))
            .removeAll(lineBreak.getSet("SA")).removeAll(script.getSet("Hiragana")).removeAll(graphemeExtend), "ALetter");
        unicodeMap.putAll(new UnicodeSet("[\\u0027\\u00B7\\u05F4\\u2019\\u2027\\u003A]"), "MidLetter");
        unicodeMap.putAll(lineBreak.getSet("Infix_Numeric").remove(0x003A), "MidNum");
        unicodeMap.putAll(lineBreak.getSet("Numeric"), "Numeric");
        unicodeMap.putAll(cat.getSet("Connector_Punctuation").remove(0x30FB).remove(0xFF65), "ExtendNumLet");
        unicodeMap.putAll(graphemeExtend, "Other"); // to verify that none of the above touch it.
        unicodeMap.setMissing("Other");
      }
    }.setMain("Word_Break", "WB", UnicodeProperty.ENUMERATED, version).addValueAliases(
        new String[][] { { "Format", "FO" }, { "Katakana", "KA" }, { "ALetter", "LE" }, { "MidLetter", "ML" }, { "MidNum", "MN" }, { "Numeric", "NU" }, { "ExtendNumLet", "EX" }, { "Other", "XX" }, },
        true).swapFirst2ValueAliases());
    if (compositeVersion >= 0x040000) add(new UnicodeProperty.UnicodeMapProperty() {
      {
        unicodeMap = new UnicodeMap();
        unicodeMap.setErrorOnReset(true);
        unicodeMap.putAll(new UnicodeSet("[\\u000A\\u000D\\u0085\\u2028\\u2029]"), "Sep");
        UnicodeProperty cat = getProperty("General_Category");
        unicodeMap.putAll(cat.getSet("Format").remove(0x200C).remove(0x200D), "Format");
        unicodeMap.putAll(getProperty("Whitespace").getSet("true").removeAll(unicodeMap.getSet("Sep")).remove(0xA0), "Sp");
        UnicodeSet graphemeExtend = getProperty("Grapheme_Extend").getSet("true");
        unicodeMap.putAll(getProperty("Lowercase").getSet("true").removeAll(graphemeExtend), "Lower");
        unicodeMap.putAll(getProperty("Uppercase").getSet("true").addAll(cat.getSet("Titlecase_Letter")), "Upper");
        UnicodeSet temp = getProperty("Alphabetic").getSet("true").add(0xA0).add(0x5F3).removeAll(unicodeMap.getSet("Lower")).removeAll(unicodeMap.getSet("Upper")).removeAll(graphemeExtend);
        unicodeMap.putAll(temp, "OLetter");
        UnicodeProperty lineBreak = getProperty("Line_Break");
        unicodeMap.putAll(lineBreak.getSet("Numeric"), "Numeric");
        unicodeMap.put(0x002E, "ATerm");
        unicodeMap.putAll(getProperty("STerm").getSet("true").removeAll(unicodeMap.getSet("ATerm")), "STerm");
        unicodeMap.putAll(cat.getSet("Open_Punctuation").addAll(cat.getSet("Close_Punctuation")).addAll(lineBreak.getSet("Quotation")).remove(0x05F3).removeAll(unicodeMap.getSet("ATerm")).removeAll(
            unicodeMap.getSet("STerm")), "Close");
        unicodeMap.putAll(graphemeExtend, "Other"); // to verify that none of the above touch it.
        unicodeMap.setMissing("Other");
      }
    }.setMain("Sentence_Break", "SB", UnicodeProperty.ENUMERATED, version).addValueAliases(
        new String[][] { { "Sep", "SE" }, { "Format", "FO" }, { "Sp", "SP" }, { "Lower", "LO" }, { "Upper", "UP" }, { "OLetter", "LE" }, { "Numeric", "NU" }, { "ATerm", "AT" }, { "STerm", "ST" },
            { "Close", "CL" }, { "Other", "XX" }, }, false).swapFirst2ValueAliases());
  }
  static String[] YES_NO_MAYBE = { "N", "M", "Y" };
  static String[] LONG_YES_NO_MAYBE = { "No", "Maybe", "Yes" };
  static String[] YES_NO = { "N", "Y" };
  static String[] LONG_YES_NO = { "No", "Yes" };
  /*
   "Bidi_Mirroring_Glyph", "Block", "Case_Folding", "Case_Sensitive", "ISO_Comment",
   "Lowercase_Mapping", "Name", "Numeric_Value", "Simple_Case_Folding", 
   "Simple_Lowercase_Mapping", "Simple_Titlecase_Mapping", "Simple_Uppercase_Mapping", 
   "Titlecase_Mapping", "Unicode_1_Name", "Uppercase_Mapping", "isCased", "isCasefolded", 
   "isLowercase", "isNFC", "isNFD", "isNFKC", "isNFKD", "isTitlecase", "isUppercase",
   "toNFC", "toNFD", "toNFKC", "toNKFD"
   });
   */
  /*
   private class NameProperty extends UnicodeProperty.SimpleProperty {
   {set("Name", "na", "<string>", UnicodeProperty.STRING);}
   public String getPropertyValue(int codepoint) {
   if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
   return ucd.getName(codepoint);
   }
   }
   */
  static class UCDPropertyWrapper extends UnicodeProperty {
    UCDProperty ucdProperty;
    boolean yes_no_maybe;
    UCDPropertyWrapper(UCDProperty ucdProperty, int type, boolean yes_no_maybe) {
      this.ucdProperty = ucdProperty;
      setType(type);
      String name = ucdProperty.getName(UCDProperty.LONG);
      if (name == null)
        ucdProperty.getName(UCDProperty.SHORT);
      setName(name);
      this.yes_no_maybe = yes_no_maybe;
    }
    protected String _getVersion() {
      return ucdProperty.getUCD().getVersion();
    }
    protected String _getValue(int codepoint) {
      String result = ucdProperty.getValue(codepoint, UCDProperty.LONG);
      if (result.length() == 0) {
        return "False";
      }
      return result;
    }
    protected List _getNameAliases(List result) {
      addUnique(ucdProperty.getName(UCDProperty.SHORT), result);
      String name = getName();
      addUnique(name, result);
      if (name.equals("White_Space"))
        addUnique("space", result);
      return result;
    }
    protected List _getValueAliases(String valueAlias, List result) {
      if (isType(BINARY_MASK)) {
        if (valueAlias.equals("True"))
          addUnique("T", result);
        else if (valueAlias.equals("False"))
          addUnique("F", result);
        addUnique(valueAlias, result);
      }
      if (yes_no_maybe) {
        if (valueAlias.equals("Yes"))
          addUnique("Y", result);
        else if (valueAlias.equals("No"))
          addUnique("N", result);
        else if (valueAlias.equals("Maybe"))
          addUnique("M", result);
        addUnique(valueAlias, result);
      }
      return result;
    }
    protected List _getAvailableValues(List result) {
      if (isType(BINARY_MASK)) {
        addUnique("True", result);
        addUnique("False", result);
      }
      if (yes_no_maybe) {
        addUnique("No", result);
        addUnique("Maybe", result);
        addUnique("Yes", result);
      }
      return result;
    }
  }
  static final int ODD_BALLS = (1 << UCD_Types.Cn) | (1 << UCD_Types.Co) | (1 << UCD_Types.Cs) | (1 << UCD.Cc);
  /* (non-Javadoc)
   * @see com.ibm.icu.dev.test.util.UnicodePropertySource#getPropertyAliases(java.util.Collection)
   */
  private class ToolUnicodeProperty extends UnicodeProperty {
    com.ibm.text.UCD.UCDProperty up;
    int propMask;
    static final int EXTRA_START = 0x10000;
    private ToolUnicodeProperty(String propertyAlias) {
      propMask = UnifiedProperty.getPropmask(propertyAlias, ucd);
      up = UnifiedProperty.make(propMask, ucd);
      if (up == null)
        throw new IllegalArgumentException("Not found: " + propertyAlias);
      if (propertyAlias.equals("Case_Fold_Turkish_I")) {
        System.out.println(propertyAlias + " " + getTypeName(getType()));
      }
      setType(getPropertyTypeInternal());
      setName(propertyAlias);
    }
    public List _getAvailableValues(List result) {
      if (result == null)
        result = new ArrayList();
      int type = getType() & CORE_MASK;
      if (type == STRING || type == MISC)
        result.add("<string>");
      else if (type == NUMERIC)
        result.add("<number>");
      else if (type == BINARY) {
        result.add("True");
        result.add("False");
      } else if (type == ENUMERATED || type == CATALOG) {
        byte style = UCD_Types.LONG;
        int prop = propMask >> 8;
        String temp = null;
        boolean titlecase = false;
        for (int i = 0; i < 256; ++i) {
          boolean check = false;
          try {
            switch (prop) {
              case UCD_Types.CATEGORY >> 8:
                temp = (ucd.getCategoryID_fromIndex((byte) i, style));
                break;
              case UCD_Types.COMBINING_CLASS >> 8:
                temp = (ucd.getCombiningClassID_fromIndex((short) i, style));
                break;
              case UCD_Types.BIDI_CLASS >> 8:
                temp = (ucd.getBidiClassID_fromIndex((byte) i, style));
                break;
              case UCD_Types.DECOMPOSITION_TYPE >> 8:
                temp = (ucd.getDecompositionTypeID_fromIndex((byte) i, style));
                //check = temp != null;
                break;
              case UCD_Types.NUMERIC_TYPE >> 8:
                temp = (ucd.getNumericTypeID_fromIndex((byte) i, style));
                titlecase = true;
                break;
              case UCD_Types.EAST_ASIAN_WIDTH >> 8:
                temp = (ucd.getEastAsianWidthID_fromIndex((byte) i, style));
                break;
              case UCD_Types.LINE_BREAK >> 8:
                temp = (ucd.getLineBreakID_fromIndex((byte) i, style));
                break;
              case UCD_Types.JOINING_TYPE >> 8:
                temp = (ucd.getJoiningTypeID_fromIndex((byte) i, style));
                break;
              case UCD_Types.JOINING_GROUP >> 8:
                temp = (ucd.getJoiningGroupID_fromIndex((byte) i, style));
                break;
              case UCD_Types.SCRIPT >> 8:
                temp = (ucd.getScriptID_fromIndex((byte) i, style));
                titlecase = true;
                if (UnicodeProperty.UNUSED.equals(temp))
                  continue;
                if (temp != null)
                  temp = UCharacter.toTitleCase(Locale.ENGLISH, temp, null);
                break;
              case UCD_Types.AGE >> 8:
                temp = (ucd.getAgeID_fromIndex((byte) i, style));
                break;
              case UCD_Types.HANGUL_SYLLABLE_TYPE >> 8:
                temp = (ucd.getHangulSyllableTypeID_fromIndex((byte) i, style));
                break;
              default:
                throw new IllegalArgumentException("Internal Error: " + prop);
            }
          } catch (ArrayIndexOutOfBoundsException e) {
            continue;
          }
          if (check)
            System.out.println("Value: " + temp);
          if (temp != null && temp.length() != 0 && !temp.equals(UNUSED)) {
            result.add(Utility.getUnskeleton(temp, titlecase));
          }
          if (check)
            System.out.println("Value2: " + temp);
        }
        //if (prop == (UCD_Types.DECOMPOSITION_TYPE>>8)) result.add("none");
        //if (prop == (UCD_Types.JOINING_TYPE>>8)) result.add("Non_Joining");
        //if (prop == (UCD_Types.NUMERIC_TYPE>>8)) result.add("None");
      }
      return result;
    }
    public List _getNameAliases(List result) {
      if (result == null)
        result = new ArrayList();
      addUnique(Utility.getUnskeleton(up.getName(UCD_Types.SHORT), false), result);
      String longName = up.getName(UCD_Types.LONG);
      addUnique(Utility.getUnskeleton(longName, true), result);
      // hack
      if (longName.equals("White_Space"))
        addUnique("space", result);
      return result;
    }
    public List _getValueAliases(String valueAlias, List result) {
      if (result == null)
        result = new ArrayList();
      int type = getType() & CORE_MASK;
      if (type == STRING || type == MISC || type == NUMERIC) {
        UnicodeProperty.addUnique(valueAlias, result);
        return result;
      } else if (type == BINARY) {
        UnicodeProperty.addUnique(valueAlias, result);
        return lookup(valueAlias, UCD_Names.YN_TABLE_LONG, UCD_Names.YN_TABLE, null, result);
      } else if (type == ENUMERATED || type == CATALOG) {
        byte style = UCD_Types.LONG;
        int prop = propMask >> 8;
        boolean titlecase = false;
        for (int i = 0; i < 256; ++i) {
          try {
            switch (prop) {
              case UCD_Types.CATEGORY >> 8:
                return lookup(valueAlias, UCD_Names.LONG_GENERAL_CATEGORY, UCD_Names.GENERAL_CATEGORY, UCD_Names.EXTRA_GENERAL_CATEGORY, result);
              case UCD_Types.COMBINING_CLASS >> 8:
                addUnique(String.valueOf(0xFF & Utility.lookup(valueAlias, UCD_Names.LONG_COMBINING_CLASS, true)), result);
                return lookup(valueAlias, UCD_Names.LONG_COMBINING_CLASS, UCD_Names.COMBINING_CLASS, null, result);
              case UCD_Types.BIDI_CLASS >> 8:
                return lookup(valueAlias, UCD_Names.LONG_BIDI_CLASS, UCD_Names.BIDI_CLASS, null, result);
              case UCD_Types.DECOMPOSITION_TYPE >> 8:
                return lookup(valueAlias, UCD_Names.LONG_DECOMPOSITION_TYPE, UCD_Names.DECOMPOSITION_TYPE, null, result);
              case UCD_Types.NUMERIC_TYPE >> 8:
                return lookup(valueAlias, UCD_Names.LONG_NUMERIC_TYPE, UCD_Names.NUMERIC_TYPE, null, result);
              case UCD_Types.EAST_ASIAN_WIDTH >> 8:
                return lookup(valueAlias, UCD_Names.LONG_EAST_ASIAN_WIDTH, UCD_Names.EAST_ASIAN_WIDTH, null, result);
              case UCD_Types.LINE_BREAK >> 8:
                lookup(valueAlias, UCD_Names.LONG_LINE_BREAK, UCD_Names.LINE_BREAK, null, result);
                if (valueAlias.equals("Inseparable"))
                  addUnique("Inseperable", result);
                // Inseparable; Inseperable
                return result;
              case UCD_Types.JOINING_TYPE >> 8:
                return lookup(valueAlias, UCD_Names.LONG_JOINING_TYPE, UCD_Names.JOINING_TYPE, null, result);
              case UCD_Types.JOINING_GROUP >> 8:
                return lookup(valueAlias, UCD_Names.JOINING_GROUP, null, null, result);
              case UCD_Types.SCRIPT >> 8:
                return lookup(valueAlias, UCD_Names.LONG_SCRIPT, UCD_Names.SCRIPT, UCD_Names.EXTRA_SCRIPT, result);
              case UCD_Types.AGE >> 8:
                return lookup(valueAlias, UCD_Names.AGE, null, null, result);
              case UCD_Types.HANGUL_SYLLABLE_TYPE >> 8:
                return lookup(valueAlias, UCD_Names.LONG_HANGUL_SYLLABLE_TYPE, UCD_Names.HANGUL_SYLLABLE_TYPE, null, result);
              default:
                throw new IllegalArgumentException("Internal Error: " + prop);
            }
          } catch (ArrayIndexOutOfBoundsException e) {
            continue;
          }
        }
      }
      throw new ArrayIndexOutOfBoundsException("not supported yet");
    }
    public String _getValue(int codepoint) {
      byte style = UCD_Types.LONG;
      String temp = null;
      boolean titlecase = false;
      switch (propMask >> 8) {
        case UCD_Types.CATEGORY >> 8:
          temp = (ucd.getCategoryID_fromIndex(ucd.getCategory(codepoint), style));
          break;
        case UCD_Types.COMBINING_CLASS >> 8:
          temp = (ucd.getCombiningClassID_fromIndex(ucd.getCombiningClass(codepoint), style));
          //if (temp.startsWith("Fixed_")) temp = temp.substring(6);
          break;
        case UCD_Types.BIDI_CLASS >> 8:
          temp = (ucd.getBidiClassID_fromIndex(ucd.getBidiClass(codepoint), style));
          break;
        case UCD_Types.DECOMPOSITION_TYPE >> 8:
          temp = (ucd.getDecompositionTypeID_fromIndex(ucd.getDecompositionType(codepoint), style));
          if (temp == null || temp.length() == 0)
            temp = "none";
          break;
        case UCD_Types.NUMERIC_TYPE >> 8:
          temp = (ucd.getNumericTypeID_fromIndex(ucd.getNumericType(codepoint), style));
          titlecase = true;
          if (temp == null || temp.length() == 0)
            temp = "None";
          break;
        case UCD_Types.EAST_ASIAN_WIDTH >> 8:
          temp = (ucd.getEastAsianWidthID_fromIndex(ucd.getEastAsianWidth(codepoint), style));
          break;
        case UCD_Types.LINE_BREAK >> 8:
          temp = (ucd.getLineBreakID_fromIndex(ucd.getLineBreak(codepoint), style));
          break;
        case UCD_Types.JOINING_TYPE >> 8:
          temp = (ucd.getJoiningTypeID_fromIndex(ucd.getJoiningType(codepoint), style));
          if (temp == null || temp.length() == 0)
            temp = "Non_Joining";
          break;
        case UCD_Types.JOINING_GROUP >> 8:
          temp = (ucd.getJoiningGroupID_fromIndex(ucd.getJoiningGroup(codepoint), style));
          break;
        case UCD_Types.SCRIPT >> 8:
          temp = (ucd.getScriptID_fromIndex(ucd.getScript(codepoint), style));
          if (temp != null)
            temp = UCharacter.toTitleCase(Locale.ENGLISH, temp, null);
          titlecase = true;
          break;
        case UCD_Types.AGE >> 8:
          temp = getAge(codepoint);
          break;
        case UCD_Types.HANGUL_SYLLABLE_TYPE >> 8:
          temp = (ucd.getHangulSyllableTypeID_fromIndex(ucd.getHangulSyllableType(codepoint), style));
          break;
      }
      if (temp != null)
        return Utility.getUnskeleton(temp, titlecase);
      if (isType(BINARY_MASK)) {
        return up.hasValue(codepoint) ? "True" : "False";
      }
      throw new IllegalArgumentException("Failed to find value for " + Utility.hex(codepoint));
    }
    public String getAge(int codePoint) {
      if (codePoint == 0xF0000) {
        System.out.println("debug point");
      }
      if (needAgeCache) {
        for (int i = UCD_Types.AGE11; i < UCD_Types.LIMIT_AGE; ++i) {
          ucdCache[i] = UCD.make(UCD_Names.AGE_VERSIONS[i]);
        }
        needAgeCache = false;
      }
      for (int i = UCD_Types.AGE11; i < UCD_Types.LIMIT_AGE; ++i) {
        if (ucdCache[i].isAllocated(codePoint))
          return UCD_Names.AGE[i];
      }
      return UCD_Names.AGE[UCD_Types.UNKNOWN];
    }
    /* (non-Javadoc)
     * @see com.ibm.icu.dev.test.util.UnicodePropertySource#getPropertyType()
     */
    private int getPropertyTypeInternal() {
      switch (propMask) {
        case UCD_Types.BINARY_PROPERTIES | UCD_Types.CaseFoldTurkishI:
        case UCD_Types.BINARY_PROPERTIES | UCD_Types.Non_break:
          return EXTENDED_BINARY;
      }
      switch (propMask >> 8) {
        case UCD_Types.SCRIPT >> 8:
        case UCD_Types.AGE >> 8:
          return CATALOG;
      }
      int mask = 0;
      if (!up.isStandard())
        mask = EXTENDED_MASK;
      return remapUCDType(up.getValueType()) | mask;
    }
    public String _getVersion() {
      return up.ucd.getVersion();
    }
  }
  private int remapUCDType(int result) {
    switch (result) {
      case UCD_Types.NUMERIC_PROP:
        result = UnicodeProperty.NUMERIC;
        break;
      case UCD_Types.STRING_PROP:
        result = UnicodeProperty.STRING;
        break;
      case UCD_Types.MISC_PROP:
        result = UnicodeProperty.STRING;
        break;
      case UCD_Types.CATALOG_PROP:
        result = UnicodeProperty.ENUMERATED;
        break;
      case UCD_Types.FLATTENED_BINARY_PROP:
      case UCD_Types.ENUMERATED_PROP:
        result = UnicodeProperty.ENUMERATED;
        break;
      case UCD_Types.BINARY_PROP:
        result = UnicodeProperty.BINARY;
        break;
      case UCD_Types.UNKNOWN_PROP:
      default:
        result = UnicodeProperty.STRING;
    //throw new IllegalArgumentException("Type: UNKNOWN_PROP");
    }
    return result;
  }
  static List lookup(String valueAlias, String[] main, String[] aux, Map aux2, List result) {
    //System.out.println(valueAlias + "=>");
    //System.out.println("=>" + aux[pos]);
    if (aux != null) {
      int pos = 0xFF & Utility.lookup(valueAlias, main, true);
      UnicodeProperty.addUnique(aux[pos], result);
    }
    UnicodeProperty.addUnique(valueAlias, result);
    if (aux2 != null) {
      String xtra = (String) aux2.get(valueAlias);
      if (xtra != null)
        UnicodeProperty.addUnique(xtra, result);
    }
    return result;
  }
  /*
   static class DerivedPropertyWrapper extends UnicodeProperty {
   UCDProperty derivedProperty;
   UCD ucd;
   DerivedPropertyWrapper(int derivedPropertyID, UCD ucd) {
   this.ucd = ucd;
   derivedProperty = DerivedProperty.make(derivedPropertyID, ucd);
   }
   protected String _getVersion() {
   return ucd.getVersion();
   }
   protected String _getValue(int codepoint) {
   return derivedProperty.getValue(codepoint, UCD_Types.LONG);
   }
   protected List _getNameAliases(List result) {
   if (result != null) result = new ArrayList(1);
   addUnique(derivedProperty.getName(UCD_Types.SHORT), result);
   addUnique(derivedProperty.getName(UCD_Types.LONG), result);
   return null;
   }
   protected List _getValueAliases(String valueAlias, List result) {
   // TODO Auto-generated method stub
   return null;
   }
   protected List _getAvailableValues(List result) {
   // TODO Auto-generated method stub
   return null;
   }
   }
   */
 }
--- a/tools/unicodetools/com/ibm/text/UCD/UCD-in-XML-Notes.htm
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD-in-XML-Notes.htm
@ -1,226 +0,0 @@
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=windows-1252">
 <meta name="GENERATOR" content="Microsoft FrontPage 4.0">
 <meta name="ProgId" content="FrontPage.Editor.Document">
 <title>Unicode Character Database</title>
 <style>
 <!--
 table        { padding: 4 }
 td           { padding: 4 }
 -->
 </style>
 </head>
 <body>
 <span class="cb" id style="DISPLAY: block">
 <h1 align="center">Unicode Character Database (UCD) in XML Format</h1>
 <h1 align="center"><b><font color="#FF0000">WARNING: FORMAT IS DRAFT!</font></b></h1>
 <p align="center">MD 2000.10.16</p>
 <table border="1" width="40%" align="right" cellspacing="4" cellpadding="0">
  <tr>
    <td width="100%" bgcolor="#C0C0C0"><span class="cb" id
      style="DISPLAY: block">
      <h4 align="center">Using Internet Explorer</h4>
      <p>The UCD-Main.xml file can be read in Internet Explorer (5.0 and above).
      However:</p>
      <ul>
        <li>It may take a few minutes to load completely.</li>
        <li>The XML parser in IE does not appear to be conformant: it seems to
          break on</span> the following valid code points (and others):
        <ul>
          <li>&lt;IEbugs<br>
            c1='&amp;#xFFF9;'<br>
            c2='&amp;#xFFFA;'<br>
            c3='&amp;#xFFFB;'<br>
            c4='&amp;#xFFFC;'<br>
            c5='&amp;#xFFFD;'<br>
            c6='&amp;#xF0000;'<br>
            c7='&amp;#xFFFFD;'<br>
            c8='&amp;#x100000;'<br>
            c9='&amp;#x10FFFD;'/&gt;</li>
        </ul>
      </li>
      </ul>
    </td>
  </tr>
 </table>
 <p><a href="UCD-Main.xml">UCD-Main.xml</a> provides an XML format for the main
 files in the Unicode Character Database. These include:</p>
 <ul>
  <li><code>UnicodeData.txt</code></li>
  <li><code>ArabicShaping.txt</code></li>
  <li><code>Jamo.txt</code></li>
  <li><code>SpecialCasing.txt</code></li>
  <li><code>CompositionExclusions.txt</code></li>
  <li><code>EastAsianWidth.txt</code></li>
  <li><code>LineBreak.txt</code></li>
  <li><code>BidiMirroring.txt</code></li>
  <li><code>CaseFolding.txt</code></li>
  <li><code>Blocks.txt</code></li>
  <li><code>PropList.alpha.txt</code></li>
 </ul>
 <p>Other files in the UCD have very different structure or purpose, and are best
 expressed with separate files. Some annotational data, such as that in
 NamesList.txt or the 10646 comment in UnicodeData, is also best served with
 separate files. The current UCD files not yet in XML format are:</p>
 <ul>
  <li><code>Unihan.txt</code></li>
  <li><code>NamesList.txt</code></li>
  <li><code>Index.txt</code></li>
  <li><code>NormalizationTest.txt</code></li>
 </ul>
 <h3>Format</h3>
 <p>The Unicode blocks are provided as a list of &lt;block .../&gt; elements,
 with attributes providing the start, end, and name.</p>
 <p>Each assigned code point is a &lt;e .../&gt; element, with attributes
 supplying specific properties. The meaning of the attributes is specified below.
 There is one exception: large ranges of code points&nbsp; for characters such as
 Hangul Syllables are abbreviated by indicating the start and end of the range.</p>
 <p>Because of the volume of data, the attribute names are abbreviated. A <a
 href="#AttributeAbbreviations">key</a> explains the abbreviations, and relates
 them to the fields and values of the original UCD semicolon-delimited files.
 With few exceptions, the values in the XML are directly copied from data in the
 original UCD semicolon-delimited files. Those exceptions are described <a
 href="http://www.unicode.org/Public/3.0-Update1/UnicodeCharacterDatabase-3.0.1.html#DataModifications">below</a>.</p>
 <p>Numeric character references (NCRs) are used to encode the Unicode code
 points. Some Unicode code points cannot be transmitted in XML, even as NCRs (see
 <a href="http://www.w3.org/TR/REC-xml#charsets">http://www.w3.org/TR/REC-xml#charsets</a>),
 or would not be visibly distinct (TAB, CR, LF) in the data. Such code points are
 represented by '#xX;', where X is a hex number.</p>
 <h3><a name="AttributeAbbreviations">Attribute Abbreviations</a></h3>
 <p>To reduce the size of the document, the following attribute abbreviations are
 used. If an attribute is missing, that means it gets a default value. The
 defaults are listed in parentheses below. If there is no specific default, then
 a missing attribute should be read as N/A (not applicable). A default with '='
 means the default is the value of another other field (recursively!). Thus if
 the titlecase attribute is missing, then the value is the same as the uppercase.
 If that in turn is missing, then the value is the same as the code point itself.</p>
 <p>For a description of the source files, see <a
 href="http://www.unicode.org/Public/UNIDATA/UnicodeCharacterDatabase.html">UnicodeCharacterDatabase.html</a>.
 That file also has links to the descriptions of the fields within the files.
 Since the PropList values are so long, they will probably also be abbreviated in
 the future.</p>
 <table border="1" width="100%">
  <tr>
    <td width="50%" valign="top"><span class="cb" id style="DISPLAY: block">
      <h4>UnicodeData</h4>
      <p>&nbsp; c: code point<br>
      &nbsp; n: name<br>
      &nbsp; gc: general category (Lo)<br>
      &nbsp; cc: combining class (0)<br>
      &nbsp; bc: bidi category (L)<br>
      &nbsp; dm: decomposition mapping<br>
      &nbsp; dt: decomposition type (canonical)<br>
      &nbsp; nt: numeric type<br>
      &nbsp; nv: numeric value<br>
      &nbsp; bm: bidi mirrored (N)<br>
      &nbsp; uc: uppercase (=c)<br>
      &nbsp; lc: lowercase (=c)<br>
      &nbsp; tc: titlecase (=uc)</p>
      <h4>SpecialCasing:</h4>
      <p>&nbsp; sl: special lower (=lc)<br>
      &nbsp; su: special upper (=uc)<br>
      &nbsp; st: special title (=su)<br>
      &nbsp; sc: special case condition</p>
      <h4>CaseFolding:</h4>
      <p>&nbsp; fc: foldcase (=sl)</span></td>
    <td width="50%" valign="top"><span class="cb" id style="DISPLAY: block">
      <h4>CompositionExclusions:</h4>
      <p>&nbsp; ce: composition exclusion (N)</p>
      <h4>EastAsianWidth:</h4>
      <p>&nbsp; ea: east asian width (N)</p>
      <h4>Jamo:</h4>
      <p>&nbsp; jn: jamo name</p>
      <h4>LineBreak:</h4>
      <p>&nbsp; lb: line break class (AL)</p>
      <h4>ArabicShaping:</h4>
      <p>&nbsp; jt: joining type<br>
      &nbsp; jg: joining group</p>
      <h4>BidiMirroring:</h4>
      <p>&nbsp; bg: bidi mirroring glyph (=c)</p>
      <p><b>PropList:</b></p>
      <p>&nbsp; xs: space-delimited list of properties from the file</p>
      <p><b><i>WARNING: these values are likely to change!</i></b></span></td>
  </tr>
 </table>
 <br>
 <h3><a name="DataModifications">Data Modifications</a></h3>
 </span>
 <p>The XML format is generated from the original semicolon-delimited UCD files.
 In general, all fields and values are direct copies. However, there are some
 changes, detailed below.</p>
 <h4>1. Some redundant or annotational fields are omitted</h4>
 <table border="1" width="100%">
  <tr>
    <td width="50%" valign="top"><b>UnicodeData<br>
      </b>1.0 Name<br>
      10646 comment<br>
      <br>
      <b>CaseFolding<br>
      </b>Type (since it is computable from whether the fold equals the normal
      lowercase)
      <p><b>ArabicShaping<br>
      </b>Name<br>
      <br>
      <b>EastAsianWidth<br>
      </b>Name<br>
      <br>
      <b>LineBreak<br>
      </b>Name</p>
    </td>
    <td width="50%" valign="top"><b>PropList</b><font face="Times New Roman"
      color="#000000">
      <p>The fields are based on the proposed PropList.alpha, which changes the
      fields considerably.</p>
      </font>
      <p><span class="cb" id style="display: block"><b><i>WARNING: other values
      are also likely to change!</i></b></span></p>
    </td>
  </tr>
 </table>
 <h4>2. Some fields are broken into several fields; others may be combined into a
 single field</h4>
 <ul>
  <li><b>dt: </b>decomposition tag
    <ul>
      <li>the 'tag' field extracted from the decomposition mapping. If there is
        no tag, the value is &quot;canonical&quot;. Only has meaning if there is
        a decomposition (<b>dm</b>).</li>
    </ul>
  </li>
  <li><b>nt: </b>numeric type
    <ul>
      <li>an enumeration [decimal, digit, numeric] for the type of number. It
        replaces having duplicate field values for numbers</li>
    </ul>
  </li>
  <li><b>rg: </b>range
    <ul>
      <li>used for ranges of values that share characteristics, instead of
        having to do a substring check.<br>
        &quot;START&quot; corresponds to &quot;&lt;..., First&gt;&quot;<br>
        &quot;END&quot; corresponds to &quot;&lt;..., Last&gt;&quot;</li>
    </ul>
  </li>
  <li><b>nc: </b>name computed
    <ul>
      <li>if &quot;COMPUTED&quot;, indicates that the name must be computed:
        e.g. Hangul Syllables, Ideographs</li>
    </ul>
  </li>
  <li><b>na: </b>name annotation
    <ul>
      <li>used for code points that do not really have associated names, like
        control characters and private use characters. The data in that case is
        either extracted from the &quot;&lt;...&gt;&quot; style name in the old
        format, or gotten from the &quot;1.0 Unicode name&quot;.</li>
    </ul>
  </li>
 </ul>
 </body>
 </html>
--- a/tools/unicodetools/com/ibm/text/UCD/UCD.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD.java
--- a/tools/unicodetools/com/ibm/text/UCD/UCDProperty.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCDProperty.java
@ -1,180 +0,0 @@
 package com.ibm.text.UCD;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.text.utility.*;
 public abstract class UCDProperty implements UCD_Types {
    // TODO: turn all of these into privates, and use setters only
    protected UCD       ucd;
    protected boolean   isStandard = true;
    protected byte      type = NOT_DERIVED;
    private byte        valueType = BINARY_PROP;
    protected boolean   hasUnassigned = false;
    protected boolean   isBinary = true;
    protected byte      defaultValueStyle = SHORT;
    protected byte      defaultPropertyStyle = LONG;
    protected String    valueName;
    protected String    numberValueName;
    protected String    shortValueName;
    protected String    header;
    protected String    subheader;
    protected String    name;
    protected String    shortName;
    protected String    numberName;
    protected boolean   skeletonize = true;
      /**
       * Return the UCD in use
       */
      public UCD getUCD() { return ucd; }
      /**
       * Is it part of the standard, or just for my testing?
       */
      public boolean isStandard() { return isStandard; }
      public void setStandard(boolean in) { isStandard = in; }
      public boolean skipInDerivedListing() {return false;}
      public boolean isDefaultValue() {return false;}
      /**
       * What type is it? DERIVED..
       */
      public byte getType() { return type; }
      public void setType(byte in) { type = in; }
      /**
       * Does getProperty vary in contents? ENUMERATED,...
       */
      public byte getValueType() { return valueType; }
      public void setValueType(byte in) { valueType = in; }
      /**
       * Does it apply to any unassigned characters?
       */
      public boolean hasUnassigned() { return hasUnassigned; }
      public void setHasUnassigned(boolean in) { hasUnassigned = in; }
      /** Header used in DerivedXXX files
       */
      public String getHeader() { return header; }
      public void setHeader(String in) { header = in; }
      /** Header used in DerivedXXX files
       */
      public String getSubHeader() { return subheader; }
      public void setSubHeader(String in) { subheader = in; }
      /**
       * Get the full name. Style is SHORT, NORMAL, LONG
       */
      public String getFullName(byte style) { 
          return getPropertyName(style) + "=" + getValue(style);
      }
      public String getFullName() { 
          return getFullName(NORMAL);
      }
      /**
       * Get the property name. Style is SHORT, NORMAL, LONG
       */
      public String getPropertyName(byte style) { 
            if (style == NORMAL) style = defaultPropertyStyle;
            switch (style) {
                case LONG: return skeletonize ? Utility.getUnskeleton(name.toString(), false) : name.toString();
                case SHORT: return shortName.toString();
                case NUMBER: return numberName.toString();
                default: throw new IllegalArgumentException("Bad property: " + style);
            }
      }
      public String getPropertyName() { return getPropertyName(NORMAL); }
      public void setPropertyName(byte style, String in) {
            if (style == NORMAL) style = defaultPropertyStyle;
            switch (style) {
              case LONG: name = Utility.getUnskeleton(in, false); break;
              case SHORT: shortName = in; break;
              case NUMBER: numberName = in; break;
              default: throw new IllegalArgumentException("Bad property: " + style);
            }
      }
      /**
       * Get the value name. Style is SHORT, NORMAL, LONG
       * "" if hasValue is false
       * MUST OVERRIDE getValue(cp...) if valueVaries
       */
      public String getValue(int cp, byte style) { 
            if (!hasValue(cp)) return "";
            return getValue(style);
      }
      public String getValue(int cp) { return getValue(cp, NORMAL); }
      public void setValue(byte style, String in) {
            if (getValueType() < BINARY_PROP) throw new IllegalArgumentException("Can't set varying value: " + style);
            if (style == NORMAL) style = defaultValueStyle;
            switch (style) {
              case LONG: valueName = Utility.getUnskeleton(in, false); break;
              case SHORT: shortValueName = in; break;
              case NUMBER: numberValueName = in; break;
              default: throw new IllegalArgumentException("Bad value: " + style);
            }
      }
      public String getValue(byte style) {
            if (getValueType() < BINARY_PROP) throw new IllegalArgumentException(
                "Value varies in " + getName(LONG) + "; call getValue(cp)");
            try {
                if (style == NORMAL) style = defaultValueStyle;
                switch (style) {
                    case LONG: return Utility.getUnskeleton(valueName.toString(), false);
                    case SHORT: return shortValueName.toString();
                    case NUMBER: return numberValueName.toString();
                    default: throw new IllegalArgumentException("Bad property: " + style);
                }
            } catch (RuntimeException e) {
                throw new com.ibm.text.utility.ChainException("Unset value string in " + getName(LONG), null, e);
            }
      }
      /**
       * special hack for NFD/NFKD
       */
 		public String getListingValue(int cp) {
        	if (getValueType() != BINARY_PROP) return getValue(cp, LONG);
        	return getPropertyName(LONG);
 		}
      /**
       * Does it have the propertyValue?
       */
      abstract public boolean hasValue(int cp);
      /**
       * Get the set of characters it contains
       */
      private UnicodeSet cache = null;
      public UnicodeSet getSet() {
        if (cache == null) {
            cache = new UnicodeSet();
            for (int cp = 0; cp <= 0x10FFFF; ++cp) {
                if (hasValue(cp)) cache.add(cp);
            }
        }
        return (UnicodeSet) cache.clone();
      }
      ///////////////////////////////////////////
      // Old Name for compatibility
      boolean isTest() { return isStandard(); }
      String getName(byte style) { return getPropertyName(style); }
      String getName() { return getPropertyName(); }
  }
--- a/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java
--- a/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java
@ -1,575 +0,0 @@
 /**
 *******************************************************************************
 * Copyright (C) 1996-2001, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $
 * $Date: 2006/04/05 22:12:44 $
 * $Revision: 1.33 $
 *
 *******************************************************************************
 */
 package com.ibm.text.UCD;
 public interface UCD_Types {
    static final byte BINARY_FORMAT = 17; // bumped if binary format of UCD changes. Forces rebuild   
    public static final String BASE_DIR = "C:\\DATA\\";
    public static final String UCD_DIR = BASE_DIR + "UCD\\";
    public static final String BIN_DIR = BASE_DIR + "BIN\\";
    public static final String GEN_DIR = BASE_DIR + "GEN\\";
    public static final char DOTTED_CIRCLE = '\u25CC';
    public static final int 
    	CJK_BASE = 0x4E00,
    	CJK_LIMIT = 0x9FFF+1,
    	CJK_COMPAT_USED_BASE = 0xFA0E,
    	CJK_COMPAT_USED_LIMIT = 0xFA2F+1,
    	CJK_A_BASE = 0x3400,
    	CJK_A_LIMIT = 0x4DBF+1,
    	CJK_B_BASE = 0x20000,
    	CJK_B_LIMIT = 0x2A6DF+1;
    // Unicode Property Types
    static final byte 
        NOT_DERIVED = 1, 
        DERIVED_CORE = 2, 
        DERIVED_NORMALIZATION = 4, 
        DERIVED_ALL = 0x6, 
        ALL = (byte)-1;
     static final byte
        NUMERIC_PROP = 0,
        STRING_PROP = 1,
        MISC_PROP = 2,
        CATALOG_PROP = 3,
        ENUMERATED_PROP = 4,
        BINARY_PROP = 5,
        FLATTENED_BINARY_PROP = 6,
        UNKNOWN_PROP = 7;
    /*
  0	Code value in 4-digit hexadecimal format.
  1	Unicode 2.1 Character Name. These names match exactly the
  2	General Category. This is a useful breakdown into various "character
  3	Canonical Combining Classes. The classes used for the
  4	Bidirectional Category. See the list below for an explanation of the
  5	Character Decomposition. In the Unicode Standard, not all of
  6	Decimal digit value. This is a numeric field. If the character
  7	Digit value. This is a numeric field. If the character represents a
  8	Numeric value. This is a numeric field. If the character has the
  9	If the characters has been identified as a "mirrored" character in
 10	Unicode 1.0 Name. This is the old name as published in Unicode 1.0.
 11	10646 Comment field. This field is informative.
 12	Upper case equivalent mapping. If a character is part of an
 13	Lower case equivalent mapping. Similar to 12. This field is informative.
 14	Title case equivalent mapping. Similar to 12. This field is informative.
    */
    // for IDs
    static final byte NUMBER = -2, SHORT = -1, NORMAL = 0, LONG = 1, BOTH = 2, EXTRA_ALIAS = 3;
    // Binary ENUM Grouping
    public static final int
        CATEGORY = 0,
        COMBINING_CLASS = 0x100,
        BIDI_CLASS = 0x200,
        DECOMPOSITION_TYPE = 0x300,
        NUMERIC_TYPE = 0x400,
        EAST_ASIAN_WIDTH = 0x500,
        LINE_BREAK = 0x600,
        JOINING_TYPE = 0x700,
        JOINING_GROUP = 0x800,
        BINARY_PROPERTIES = 0x900,
        SCRIPT = 0xA00,
        AGE = 0xB00,
        HANGUL_SYLLABLE_TYPE = 0xC00,
        DERIVED = 0xD00,
        LIMIT_ENUM = DERIVED + 0x100,
        NEXT_ENUM = 0x100;
    public static final int LIMIT_COMBINING_CLASS = 256;
    // getCategory
    public static final byte
 	UNASSIGNED		= 0,
 	UPPERCASE_LETTER	= 1,
 	LOWERCASE_LETTER	= 2,
 	TITLECASE_LETTER	= 3,
 	MODIFIER_LETTER		= 4,
 	OTHER_LETTER		= 5,
 	NON_SPACING_MARK	= 6,
 	ENCLOSING_MARK		= 7,
 	COMBINING_SPACING_MARK	= 8,
 	DECIMAL_DIGIT_NUMBER	= 9,
 	LETTER_NUMBER		= 10,
 	OTHER_NUMBER		= 11,
 	SPACE_SEPARATOR		= 12,
 	LINE_SEPARATOR		= 13,
 	PARAGRAPH_SEPARATOR	= 14,
 	CONTROL			= 15,
 	FORMAT			= 16,
 	UNUSED_CATEGORY			= 17,
 	PRIVATE_USE		= 18,
 	SURROGATE		= 19,
 	DASH_PUNCTUATION	= 20,
 	START_PUNCTUATION	= 21,
 	END_PUNCTUATION		= 22,
 	CONNECTOR_PUNCTUATION	= 23,
 	OTHER_PUNCTUATION	= 24,
 	MATH_SYMBOL		= 25,
 	CURRENCY_SYMBOL		= 26,
 	MODIFIER_SYMBOL		= 27,
 	OTHER_SYMBOL		= 28,
 	INITIAL_PUNCTUATION	= 29,
 	FINAL_PUNCTUATION		= 30,
 	LIMIT_CATEGORY = FINAL_PUNCTUATION+1,
 	// Unicode abbreviations
 	Lu = UPPERCASE_LETTER,
 	Ll = LOWERCASE_LETTER,
 	Lt = TITLECASE_LETTER,
    Lm = MODIFIER_LETTER,
 	Lo = OTHER_LETTER,
 	Mn = NON_SPACING_MARK,
 	Me = ENCLOSING_MARK,
 	Mc = COMBINING_SPACING_MARK,
 	Nd = DECIMAL_DIGIT_NUMBER,
 	Nl = LETTER_NUMBER,
 	No = OTHER_NUMBER,
 	Zs = SPACE_SEPARATOR,
 	Zl = LINE_SEPARATOR,
 	Zp = PARAGRAPH_SEPARATOR,
 	Cc = CONTROL,
 	Cf = FORMAT,
 	Cs = SURROGATE,
 	Co = PRIVATE_USE,
 	Cn = UNASSIGNED,
 	Pc = CONNECTOR_PUNCTUATION,
 	Pd = DASH_PUNCTUATION,
 	Ps = START_PUNCTUATION,
 	Pe = END_PUNCTUATION,
 	Po = OTHER_PUNCTUATION,
 	Pi = INITIAL_PUNCTUATION,
 	Pf = FINAL_PUNCTUATION,
 	Sm = MATH_SYMBOL,
 	Sc = CURRENCY_SYMBOL,
 	Sk = MODIFIER_SYMBOL,
 	So = OTHER_SYMBOL;
    static final int
        LETTER_MASK = (1<<Lu) | (1<<Ll) | (1<<Lt) | (1<<Lm) | (1 << Lo),
        CASED_LETTER_MASK = (1<<Lu) | (1<<Ll) | (1<<Lt),
        MARK_MASK = (1<<Mn) | (1<<Me) | (1<<Mc),
        NUMBER_MASK = (1<<Nd) | (1<<Nl) | (1<<No),
        SEPARATOR_MASK = (1<<Zs) | (1<<Zl) | (1<<Zp),
        CONTROL_MASK = (1<<Cc) | (1<<Cf) | (1<<Cs) | (1<<Co),
        PUNCTUATION_MASK = (1<<Pc) | (1<<Pd) | (1<<Ps) | (1<<Pe) | (1<<Po) | (1<<Pi) | (1<<Pf),
        SYMBOL_MASK = (1<<Sm) | (1<<Sc) | (1<<Sk) | (1<<So),
        UNASSIGNED_MASK = (1<<Cn),
        BASE_MASK = LETTER_MASK | NUMBER_MASK | PUNCTUATION_MASK | SYMBOL_MASK | (1<<Mc),
        NONSPACING_MARK_MASK = (1<<Mn) | (1<<Me);
 	// Binary Properties
 	public static final byte
 	    BidiMirrored = 0,
 	    CompositionExclusion = 1,
        White_space = 2,
        Non_break = 3,
 	    Bidi_Control = 4,
        Join_Control = 5,
        Dash = 6,
        Hyphen = 7,
        Quotation_Mark = 8,
        Terminal_Punctuation = 9,
        Math_Property = 10,
        Hex_Digit = 11,
        ASCII_Hex_Digit = 12,
 	    Other_Alphabetic = 13,
        Ideographic = 14,
        Diacritic = 15,
        Extender = 16,
        Other_Lowercase = 17,
        Other_Uppercase = 18,
        Noncharacter_Code_Point = 19,
        CaseFoldTurkishI = 20,
        Other_GraphemeExtend = 21,
        GraphemeLink = 22,
        IDS_BinaryOperator = 23,
        IDS_TrinaryOperator = 24,
        Radical = 25,
        UnifiedIdeograph = 26,
        Other_Default_Ignorable_Code_Point = 27,
        Deprecated = 28,
        Soft_Dotted = 29,
        Logical_Order_Exception = 30,
        Other_ID_Start = 31,
        Sentence_Terminal = 32,
        Variation_Selector = 33,
        Other_ID_Continue = 34,
        Pattern_White_Space = 35,
        Pattern_Syntax = 36,
 	    LIMIT_BINARY_PROPERTIES = 37;
 	/*
    static final int
 	    BidiMirroredMask = 1<<BidiMirrored,
 	    CompositionExclusionMask = 1<<CompositionExclusion,
 	    AlphabeticMask = 1<<Other_Alphabetic,
 	    Bidi_ControlMask = 1<<Bidi_Control,
        DashMask = 1<<Dash,
        DiacriticMask = 1<<Diacritic,
        ExtenderMask = 1<<Extender,
        Hex_DigitMask = 1<<Hex_Digit,
        HyphenMask = 1<<Hyphen,
        IdeographicMask = 1<<Ideographic,
        Join_ControlMask = 1<<Join_Control,
        Math_PropertyMask = 1<<Math_Property,
        Non_breakMask = 1<<Non_break,
        Noncharacter_Code_PointMask = 1<<Noncharacter_Code_Point,
        Other_LowercaseMask = 1<<Other_Lowercase,
        Other_UppercaseMask = 1<<Other_Uppercase,
        Quotation_MarkMask = 1<<Quotation_Mark,
        Terminal_PunctuationMask = 1<<Terminal_Punctuation,
        White_spaceMask = 1<<White_space;
    */
    // line break
    public static final byte
        LB_XX = 0, LB_OP = 1, LB_CL = 2, LB_QU = 3, LB_GL = 4, LB_NS = 5, LB_EX = 6, LB_SY = 7,
        LB_IS = 8, LB_PR = 9, LB_PO = 10, LB_NU = 11, LB_AL = 12, LB_ID = 13, LB_IN = 14, LB_HY = 15,
        LB_CM = 16, LB_BB = 17, LB_BA = 18, LB_SP = 19, LB_BK = 20, LB_CR = 21, LB_LF = 22, LB_CB = 23,
        LB_SA = 24, LB_AI = 25, LB_B2 = 26, LB_SG = 27, LB_ZW = 28,
        LB_NL = 29,
        LB_WJ = 30,
        LB_JL = 31,
        LB_JV = 32,
        LB_JT = 33,
        LB_H2 = 34,
        LB_H3 = 35,
        //LB_JL = 29,
        //LB_JV = 30,
        //LB_JT = 31,
        LIMIT_LINE_BREAK = 36,
        LB_LIMIT = LIMIT_LINE_BREAK;
    // east asian width
    public static final byte
         EAN = 0, EAA = 1, EAH = 2, EAW = 3, EAF = 4, EANa = 5,
         LIMIT_EAST_ASIAN_WIDTH = 6;
 	// bidi class
 	static final byte
 	    BIDI_L = 0,     // Left-Right; Most alphabetic, syllabic, and logographic characters (e.g., CJK ideographs)
 	    BIDI_R = 1,     // Right-Left; Arabic, Hebrew, and punctuation specific to those scripts
 	    BIDI_EN = 2,    // European Number
 	    BIDI_ES = 3,    // European Number Separator
 	    BIDI_ET = 4,    // European Number Terminator
 	    BIDI_AN = 5,    // Arabic Number
 	    BIDI_CS = 6,    // Common Number Separator
 	    BIDI_B = 7,     // Block Separator
 	    BIDI_S = 8,     // Segment Separator
 	    BIDI_WS = 9,    // Whitespace
 	    BIDI_ON = 10,   // Other Neutrals ; All other characters: punctuation, symbols
 	    LIMIT_BIDI_2 = 11,
 	    BIDI_UNUSED = 11,
 	    BIDI_BN = 12,
 	    BIDI_NSM = 13,
 	    BIDI_AL = 14,
 	    BIDI_LRO = 15,
 	    BIDI_RLO = 16,
 	    BIDI_LRE = 17,
 	    BIDI_RLE = 18,
 	    BIDI_PDF = 19,
 	    LIMIT_BIDI_CLASS = 20;
 	// decompositionType
    static final byte NONE = 0,
        CANONICAL = 1,
        COMPATIBILITY = 2,
        COMPAT_UNSPECIFIED = 2,	// Otherwise unspecified compatibility character.
        COMPAT_FONT = 3,		// A font variant (e.g. a blackletter form).
        COMPAT_NOBREAK = 4,	// A no-break version of a space or hyphen.
        COMPAT_INITIAL = 5,	// // An initial presentation form (Arabic).
        COMPAT_MEDIAL = 6,	// // A medial presentation form (Arabic).
        COMPAT_FINAL = 7,	// // 	A final presentation form (Arabic).
        COMPAT_ISOLATED = 8,	// An isolated presentation form (Arabic).
        COMPAT_CIRCLE = 9,	// An encircled form.
        COMPAT_SUPER = 10,	// 	A superscript form.
        COMPAT_SUB = 11,	// 	A subscript form.
        COMPAT_VERTICAL = 12,	// A vertical layout presentation form.
        COMPAT_WIDE = 13,	// 	A wide (or zenkaku) compatibility character.
        COMPAT_NARROW = 14,	// A narrow (or hankaku) compatibility character.
        COMPAT_SMALL = 15,	// 	A small variant form (CNS compatibility).
        COMPAT_SQUARE = 16,	// A CJK squared font variant.
        COMPAT_FRACTION = 17,	// A vulgar fraction form.
        LIMIT_DECOMPOSITION_TYPE = 18;
    // mirrored type
    static final byte NO = 0, YES = 1, LIMIT_MIRRORED = 2;
    // for QuickCheck
    static final byte QNO = 0, QMAYBE = 1, QYES = 2;
    // case type
    static final byte LOWER = 0, TITLE = 1, UPPER = 2, UNCASED = 3, FOLD = 3, LIMIT_CASE = 4;
    static final byte SIMPLE = 0, FULL = 8;
    // normalization type
    static final byte UNNORMALIZED = 0, C = 1, KC = 2, D = 3, KD = 4, FORM_LIMIT = 5;
    // numericType
    static final byte NUMERIC_NONE = 0, NUMERIC = 1, DIGIT = 2, DECIMAL = 3,
        LIMIT_NUMERIC_TYPE = 4;
 //        HAN_PRIMARY = 4, HAN_ACCOUNTING = 5, HAN_OTHER = 6,
    // WARNING, reset to 7 if all properties desired!!
    static final byte NA = 0, L = 1, V = 2, T = 3, LV = 4, LVT = 5,
        HANGUL_SYLLABLE_TYPE_LIMIT = 6;
    public static final byte // SCRIPT CODE
        COMMON_SCRIPT = 0,
        LATIN_SCRIPT = 1,
        GREEK_SCRIPT = 2,
        CYRILLIC_SCRIPT = 3,
        ARMENIAN_SCRIPT = 4,
        HEBREW_SCRIPT = 5,
        ARABIC_SCRIPT = 6,
        SYRIAC_SCRIPT = 7,
        THAANA_SCRIPT = 8,
        DEVANAGARI_SCRIPT = 9,
        BENGALI_SCRIPT = 10,
        GURMUKHI_SCRIPT = 11,
        GUJARATI_SCRIPT = 12,
        ORIYA_SCRIPT = 13,
        TAMIL_SCRIPT = 14,
        TELUGU_SCRIPT = 15,
        KANNADA_SCRIPT = 16,
        MALAYALAM_SCRIPT = 17,
        SINHALA_SCRIPT = 18,
        THAI_SCRIPT = 19,
        LAO_SCRIPT = 20,
        TIBETAN_SCRIPT = 21,
        MYANMAR_SCRIPT = 22,
        GEORGIAN_SCRIPT = 23,
        UNUSED_SCRIPT = 24,
        HANGUL_SCRIPT = 25,
        ETHIOPIC_SCRIPT = 26,
        CHEROKEE_SCRIPT = 27,
        ABORIGINAL_SCRIPT = 28,
        OGHAM_SCRIPT = 29,
        RUNIC_SCRIPT = 30,
        KHMER_SCRIPT = 31,
        MONGOLIAN_SCRIPT = 32,
        HIRAGANA_SCRIPT = 33,
        KATAKANA_SCRIPT = 34,
        BOPOMOFO_SCRIPT = 35,
        HAN_SCRIPT = 36,
        YI_SCRIPT = 37,
        OLD_ITALIC_SCRIPT = 38,
        GOTHIC_SCRIPT = 39,
        DESERET_SCRIPT = 40,
        INHERITED_SCRIPT = 41,
        TAGALOG_SCRIPT = 42, 
        HANUNOO_SCRIPT = 43,
        BUHID_SCRIPT = 44,
        TAGBANWA_SCRIPT = 45,
        LIMBU = 46,
        TAI_LE = 47,
        LINEAR_B = 48,
        UGARITIC = 49,
        SHAVIAN = 50,
        OSMANYA = 51,
        CYPRIOT = 52,
        BRAILLE = 53,
        KATAKANA_OR_HIRAGANA = 54,
        BUGINESE = 55,
 	    COPTIC = 56,
 	    NEW_TAI_LUE = 57,
 	    GLAGOLITIC = 58,
 	    TIFINAGH = 59,
 	    SYLOTI_NAGRI = 60,
 	    OLD_PERSIAN = 61,
 	    KHAROSHTHI = 62,
 	    Balinese = 63,
 	    Cuneiform = 64,
 	    Phoenician = 65,
 	    Phags_Pa = 66,
 	    NKo = 67,
 	    Unknown_Script = 68,
        LIMIT_SCRIPT = 69;
  static final int
    UNKNOWN = 0,
    AGE11 = 1,
    AGE20 = 2,
    AGE21 = 3,
    AGE30 = 4,
    AGE31 = 5,
    AGE32 = 6,
    AGE40 = 7,
    AGE41 = 8,
    AGE50 = 9,
    LIMIT_AGE = 10;
    static final String[] AGE_VERSIONS = {
        "?",
        "1.1.0",
        "2.0.0",
        "2.1.2",
        "3.0.0",
        "3.1.0",
        "3.2.0",
        "4.0.0",  
        "4.1.0",      
        "5.0.0"       
    };
 public static byte
    JT_C = 0,
    JT_D = 1,
    JT_R = 2,
    JT_U = 3,
    JT_L = 4,
    JT_T = 5,
    LIMIT_JOINING_TYPE = 6;
 public static byte
    NO_SHAPING = 0,
    AIN = 1,
    ALAPH = 2,
    ALEF = 3,
    BEH = 4,
    BETH = 5,
    DAL = 6,
    DALATH_RISH = 7,
    E = 8,
    FEH = 9,
    FINAL_SEMKATH = 10,
    GAF = 11,
    GAMAL = 12,
    HAH = 13,
    HAMZA_ON_HEH_GOAL = 14,
    HE = 15,
    HEH = 16,
    HEH_GOAL = 17,
    HETH = 18,
    KAF = 19,
    KAPH = 20,
    KNOTTED_HEH = 21,
    LAM = 22,
    LAMADH = 23,
    MEEM = 24,
    MIM = 25,
    NOON = 26,
    NUN = 27,
    PE = 28,
    QAF = 29,
    QAPH = 30,
    REH = 31,
    REVERSED_PE = 32,
    SAD = 33,
    SADHE = 34,
    SEEN = 35,
    SEMKATH = 36,
    SHIN = 37,
    SWASH_KAF = 38,
    TAH = 39,
    TAW = 40,
    TEH_MARBUTA = 41,
    TETH = 42,
    WAW = 43,
    SYRIAC_WAW = 44,
    YEH = 45,
    YEH_BARREE = 46,
    YEH_WITH_TAIL = 47,
    YUDH = 48,
    YUDH_HE = 49,
    ZAIN = 50,
    ZHAIN = 51,
    KHAPH = 52,
    FE = 53,
    LIMIT_JOINING_GROUP = 54;
    static final byte NFD = 0, NFC = 1, NFKD = 2, NFKC = 3;    
    public static final int
        NF_COMPATIBILITY_MASK = 2,
        NF_COMPOSITION_MASK = 1;
    // DERIVED PROPERTY
    static final byte
        PropMath = 0,
        PropAlphabetic = 1,
        PropLowercase = 2,
        PropUppercase = 3,
        ID_Start = 4,
        ID_Continue_NO_Cf = 5,
        Mod_ID_Start = 6,
        Mod_ID_Continue_NO_Cf = 7,
        Missing_Uppercase = 8,
        Missing_Lowercase = 9,
        Missing_Mixedcase = 10,
        FC_NFKC_Closure = 11,
        FullCompExclusion = 12,
        FullCompInclusion = 13,
        QuickNFD = 14,
        QuickNFC = 15,
        QuickNFKD = 16,
        QuickNFKC = 17,
        ExpandsOnNFD = 18,
        ExpandsOnNFC = 19,
        ExpandsOnNFKD = 20,
        ExpandsOnNFKC = 21,
        GenNFD = 22,
        GenNFC = 23,
        GenNFKD = 24,
        GenNFKC = 25,
        DefaultIgnorable = 26,
        GraphemeExtend = 27,
        GraphemeBase = 28,
        FC_NFC_Closure = 29,
        Other_Case_Ignorable = 30,
        Case_Ignorable = 31,
        Type_i = 32,
        NFC_Leading = 33,
        NFC_TrailingNonZero = 34,
        NFC_TrailingZero = 35,
        NFC_Resulting = 36,
        NFD_UnsafeStart = 37,
        NFC_UnsafeStart = 38,
        NFKD_UnsafeStart = 39,
        NFKC_UnsafeStart = 40,
        NFD_Skippable = 41,
        NFC_Skippable = 42,
        NFKD_Skippable = 43,
        NFKC_Skippable = 44,
        Case_Sensitive = 45,
        DERIVED_PROPERTY_LIMIT = 46;
 }
--- a/Show More
+++ b/Show More