ICU-0 fixes for generation of Unicode 4.1.0 properties

X-SVN-Rev: 16858
2004-11-13 23:10:32 +00:00 · 2004-11-13 23:10:32 +00:00 · bd1094eaca
commit bd1094eaca
parent 4c8340b33f
16 changed files with 1415 additions and 543 deletions
--- a/icu4j/src/com/ibm/icu/dev/test/util/Equator.java
+++ b/icu4j/src/com/ibm/icu/dev/test/util/Equator.java
@ -0,0 +1,27 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2002-2004, International Business Machines Corporation and    *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.test.util;
+
+public interface Equator {
+    /**
+      * Comparator function. If overridden, must handle case of null,
+      * and compare any two objects that could be compared.
+      * Must obey normal rules of symmetry: a=b => b=a
+      * and transitivity: a=b & b=c => a=b)
+      * @param a
+      * @param b
+      * @return true if a and b are equal
+      */
+     public boolean isEqual(Object a, Object b);
+
+    /**
+     * Must obey normal rules: a=b => getHashCode(a)=getHashCode(b)
+     * @param object
+     * @return a hash code for the object
+     */
+    public int getHashCode(Object object);
+}
--- a/icu4j/src/com/ibm/icu/dev/test/util/ListSet.java
+++ b/icu4j/src/com/ibm/icu/dev/test/util/ListSet.java
@ -0,0 +1,247 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 2002-2004, International Business Machines Corporation and    *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ */
+package com.ibm.icu.dev.test.util;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.ListIterator;
+import java.util.Set;
+import java.util.List;
+import java.util.TreeSet;
+
+/**
+ * A list with unique items. It does not permit multiple items to be added, and does not support (at
+ * least for now) adding elements at a position. (Support may be added later). Also should add support
+ * for Equator.
+ * @author davis
+ */
+public class ListSet implements Set, List {
+	List list = new ArrayList();
+	Set set;
+	Comparator comparator;
+	
+	ListSet(Comparator comparator) {
+		this.comparator = comparator;
+		set = new TreeSet(comparator);
+	}
+	/**
+	 * @param index
+	 * @param element
+	 */
+	public void add(int index, Object element) {
+		throw new UnsupportedOperationException();
+	}
+	/**
+	 * @param o
+	 * @return
+	 */
+	public boolean add(Object o) {
+		boolean result = set.add(o);
+		if (result) list.add(o);
+		return result;
+	}
+	/**
+	 * @param index
+	 * @param c
+	 * @return
+	 */
+	public boolean addAll(int index, Collection c) {
+		throw new UnsupportedOperationException();
+	}
+	/**
+	 * @param c
+	 * @return
+	 */
+	public boolean addAll(Collection c) {
+		// TODO optimize
+		boolean result = false;
+		for (Iterator it = c.iterator(); it.hasNext();) {
+			result = result || add(it.next());
+		}
+		return result;
+	}
+	/**
+	 * 
+	 */
+	public void clear() {
+		list.clear();
+	}
+	/**
+	 * @param o
+	 * @return
+	 */
+	public boolean contains(Object o) {
+		return set.contains(o);
+	}
+	/**
+	 * @param c
+	 * @return
+	 */
+	public boolean containsAll(Collection c) {
+		return set.containsAll(c);
+	}
+	/* (non-Javadoc)
+	 * @see java.lang.Object#equals(java.lang.Object)
+	 */
+	public boolean equals(Object obj) {
+		try {
+			ListSet other = (ListSet) obj;
+			return list.equals(other.list) && set.equals(other.set);
+		} catch (ClassCastException e) {
+			return false;
+		}
+	}
+	/**
+	 * @param index
+	 * @return
+	 */
+	public Object get(int index) {
+		return list.get(index);
+	}
+	/* (non-Javadoc)
+	 * @see java.lang.Object#hashCode()
+	 */
+	public int hashCode() {
+		return list.hashCode();
+	}
+	/**
+	 * @param o
+	 * @return
+	 */
+	public int indexOf(Object o) {
+		for (int i = 0; i < list.size(); ++i) {
+			if (0 == comparator.compare(list.get(i), o)) return i;
+		}
+		return -1;
+	}
+	/**
+	 * @return
+	 */
+	public boolean isEmpty() {
+		return list.isEmpty();
+	}
+	/**
+	 * @return
+	 */
+	public Iterator iterator() {
+		return list.iterator();
+	}
+	/**
+	 * @param o
+	 * @return
+	 */
+	public int lastIndexOf(Object o) {
+		for (int i = list.size()-1; i >= 0 ; --i) {
+			if (0 == comparator.compare(list.get(i), o)) return i;
+		}
+		return -1;
+	}
+	/**
+	 * @return
+	 */
+	public ListIterator listIterator() {
+		return list.listIterator();
+	}
+	/**
+	 * @param index
+	 * @return
+	 */
+	public ListIterator listIterator(int index) {
+		return list.listIterator(index);
+	}
+	/**
+	 * @param index
+	 * @return
+	 */
+	public Object remove(int index) {
+		throw new UnsupportedOperationException();
+	}
+	/**
+	 * @param o
+	 * @return
+	 */
+	public boolean remove(Object o) {
+		boolean result = set.remove(o);
+		if (!result) return false;
+		return matchListToSet();
+	}
+	/**
+	 * @param c
+	 * @return
+	 */
+	public boolean removeAll(Collection c) {
+		boolean result = set.removeAll(c);
+		if (!result) return false;
+		return matchListToSet();
+
+	}
+	/**
+	 * @param c
+	 * @return
+	 */
+	public boolean retainAll(Collection c) {
+		boolean result = set.retainAll(c);
+		if (!result) return false;
+		return matchListToSet();
+	}
+	/**
+	 * @return
+	 */
+	private boolean matchListToSet() {
+		for (Iterator it = list.iterator(); it.hasNext();) {
+			Object o = it.next();
+			if (!set.contains(o)) it.remove();
+		}
+		return true;
+	}
+	/**
+	 * @param index
+	 * @param element
+	 * @return
+	 */
+	public Object set(int index, Object element) {
+		throw new UnsupportedOperationException();
+	}
+	/**
+	 * @return
+	 */
+	public int size() {
+		return list.size();
+	}
+	/**
+	 * @param fromIndex
+	 * @param toIndex
+	 * @return
+	 */
+	public List subList(int fromIndex, int toIndex) {
+		ListSet result = new ListSet(comparator);
+		result.add(list.subList(fromIndex, toIndex));
+		return result;
+	}
+	/**
+	 * @return
+	 */
+	public Object[] toArray() {
+		return list.toArray();
+	}
+	/**
+	 * @param a
+	 * @return
+	 */
+	public Object[] toArray(Object[] a) {
+		return list.toArray(a);
+	}
+	/* (non-Javadoc)
+	 * @see java.lang.Object#toString()
+	 */
+	public String toString() {
+		return list.toString();
+	}
+}
--- a/icu4j/src/com/ibm/icu/dev/test/util/TestUtilities.java
+++ b/icu4j/src/com/ibm/icu/dev/test/util/TestUtilities.java
@ -39,7 +39,7 @@ public class TestUtilities extends TestFmwk {
    UnicodeMap map1 = new UnicodeMap();
    Map map2 = new HashMap();
    Map map3 = new TreeMap();
-    UnicodeMap.Equator equator = UnicodeMap.SIMPLE_EQUATOR;
+    Comparator equator = UnicodeMap.SIMPLE_EQUATOR;
    SortedSet log = new TreeSet();
    static String[] TEST_VALUES = {null, "A", "B", "C", "D", "E", "F"};
    static Random random = new Random(12345);
@ -126,7 +126,7 @@ public class TestUtilities extends TestFmwk {
        map3 = new TreeMap();
        Object lastValue = new Object();
        while (mi.next()) {
-            if (!UnicodeMap.SIMPLE_EQUATOR.isEqual(lastValue, mi.value)) {
+            if (UnicodeMap.SIMPLE_EQUATOR.compare(lastValue, mi.value) != 0) {
                // System.out.println("Change: " + Utility.hex(mi.codepoint) + " => " + mi.value);
                lastValue = mi.value;
            }
@ -140,7 +140,7 @@ public class TestUtilities extends TestFmwk {
        for (int i = 0; i < LIMIT; ++i) {
            Object value1 = map1.getValue(i);
            Object value2 = map2.get(new Integer(i));
-            if (!equator.isEqual(value1, value2)) {
+            if (equator.compare(value1, value2) != 0) {
                errln(counter + " Difference at " + Utility.hex(i)
                     + "\t UnicodeMap: " + value1
                     + "\t HashMap: " + value2);
--- a/icu4j/src/com/ibm/icu/dev/test/util/UnicodeMap.java
+++ b/icu4j/src/com/ibm/icu/dev/test/util/UnicodeMap.java
@ -10,6 +10,7 @@ import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Comparator;
 import java.util.Iterator;
+import java.util.List;
 import java.util.Set;
 import java.util.TreeSet;

@ -30,11 +31,16 @@ public final class UnicodeMap implements Cloneable {
    private int length = 2;
    private int[] transitions = {0,0x110000,0,0,0,0,0,0,0,0};
    private Object[] values = new Object[10];
+    private boolean errorOnReset = false;
+    
+    private ListSet availableValues;
+    boolean staleAvailableValues = false;

    private int lastIndex = 0;
    
-    public UnicodeMap(Equator equator) {
+    public UnicodeMap(Comparator equator) {
        this.equator = equator;
+        availableValues = new ListSet(equator);
    }
    
    public UnicodeMap() {
@ -49,7 +55,7 @@ public final class UnicodeMap implements Cloneable {
            if (length != that.length || !equator.equals(that.equator)) return false;
            for (int i = 0; i < length-1; ++i) {
                if (transitions[i] != that.transitions[i]) return false;
-                if (!equator.isEqual(values[i], that.values[i])) return false;
+                if (!areEqual(values[i], that.values[i])) return false;
            }
            return true;
        } catch (ClassCastException e) {
@ -57,12 +63,22 @@ public final class UnicodeMap implements Cloneable {
        }
    }
    
+    public int getHashCode(Object o) {
+    	return o.hashCode();
+    	//equator.getHashCode
+    }
+    
+    public boolean areEqual(Object a, Object b) {
+    	return equator.compare(a, b) == 0;
+    	//equator.getHashCode
+    }
+    
    public int hashCode() {
        int result = length;
        // TODO might want to abbreviate this for speed.
        for (int i = 0; i < length-1; ++i) {
            result = 37*result + transitions[i];
-            result = 37*result + equator.getHashCode(values[i]);
+            result = 37*result + getHashCode(values[i]);
        }
        return result;
    }
@ -75,6 +91,8 @@ public final class UnicodeMap implements Cloneable {
        that.length = length;
        that.transitions = (int[]) transitions.clone();
        that.values = (Object[]) values.clone();
+        that.equator = equator;
+        that.availableValues = new ListSet(equator);
        return that;
    }
    
@ -87,7 +105,7 @@ public final class UnicodeMap implements Cloneable {
              throw new IllegalArgumentException("Invariant failed: Lengths bad");
          }
        for (int i = 1; i < length-1; ++i) {
-            if (equator.isEqual(values[i-1], values[i])) {
+            if (areEqual(values[i-1], values[i])) {
                throw new IllegalArgumentException("Invariant failed: values shared at " 
                    + "\t" + Utility.hex(i-1) + ": <" + values[i-1] + ">"
                    + "\t" + Utility.hex(i) + ": <" + values[i] + ">"
@ -107,39 +125,20 @@ public final class UnicodeMap implements Cloneable {
        }
    }
    
-    public interface Equator {
-        /**
-          * Comparator function. If overridden, must handle case of null,
-          * and compare any two objects that could be compared.
-          * Must obey normal rules of symmetry: a=b => b=a
-          * and transitivity: a=b & b=c => a=b)
-          * @param a
-          * @param b
-          * @return true if a and b are equal
-          */
-         public boolean isEqual(Object a, Object b);
-
-        /**
-         * Must obey normal rules: a=b => getHashCode(a)=getHashCode(b)
-         * @param object
-         * @return a hash code for the object
-         */
-        public int getHashCode(Object object);
-    }
-    
-    private static final class SimpleEquator implements Equator {
-        public boolean isEqual(Object a, Object b) {
-            if (a == b) return true;
-            if (a == null || b == null) return false;
-            return a.equals(b);
+    private static final class SimpleEquator implements Comparator {
+        public int compare(Object a, Object b) {
+            if (a == b) return 0;
+            if (a == null) return -1;
+            if (b == null) return 1;
+            return ((Comparable)a).compareTo((Comparable)b);
        }
        public int getHashCode(Object a) {
            if (a == null) return 0;
            return a.hashCode();
        }
    }
-    public static Equator SIMPLE_EQUATOR = new SimpleEquator(); 
-    private Equator equator = SIMPLE_EQUATOR;
+    public static Comparator SIMPLE_EQUATOR = new SimpleEquator(); 
+    private Comparator equator = SIMPLE_EQUATOR;
 
    /**
     * Finds an index such that inversionList[i] <= codepoint < inversionList[i+1]
@ -261,7 +260,16 @@ public final class UnicodeMap implements Cloneable {
        }
        int limitIndex = baseIndex + 1;
        // cases are (a) value is already set
-        if (equator.isEqual(values[baseIndex], value)) return this;
+        if (areEqual(values[baseIndex], value)) return this;        
+        if (errorOnReset && values[baseIndex] != null) {
+        	throw new IllegalArgumentException("Attempt to reset value for " + Utility.hex(codepoint)
+        			+ " when that is disallowed. Old: " + values[baseIndex] + "; New: " + value);
+        }
+
+        // adjust the available values
+        staleAvailableValues = true;
+        availableValues.add(value); // add if not there already      
+
        int baseCP = transitions[baseIndex];
        int limitCP = transitions[limitIndex];
        // we now start walking through the difference case,
@ -271,12 +279,12 @@ public final class UnicodeMap implements Cloneable {
        if (baseCP == codepoint) {
            // CASE: At very start of range
            boolean connectsWithPrevious = 
-                baseIndex != 0 && equator.isEqual(value, values[baseIndex-1]);               
+                baseIndex != 0 && areEqual(value, values[baseIndex-1]);               
                
            if (limitCP == codepoint + 1) {
                // CASE: Single codepoint range
                boolean connectsWithFollowing =
-                    baseIndex < length - 1 && equator.isEqual(value, values[limitIndex]);
+                    baseIndex < length - 1 && areEqual(value, values[limitIndex]);
                
                if (connectsWithPrevious) {
                    // A1a connects with previous & following, so remove index
@ -308,7 +316,7 @@ public final class UnicodeMap implements Cloneable {
            // CASE: at end of range        
            // if connects, just back up range
            boolean connectsWithFollowing =
-                baseIndex < length - 1 && equator.isEqual(value, values[limitIndex]);
+                baseIndex < length - 1 && areEqual(value, values[limitIndex]);

            if (connectsWithFollowing) {
                --transitions[limitIndex]; 
@ -396,6 +404,8 @@ public final class UnicodeMap implements Cloneable {
     * @return this (for chaining)
     */
    public UnicodeMap setMissing(Object value) {
+    	staleAvailableValues = true;
+    	availableValues.add(value);
        for (int i = 0; i < length; ++i) {
            if (values[i] == null) values[i] = value;
        }
@ -412,7 +422,7 @@ public final class UnicodeMap implements Cloneable {
    public UnicodeSet getSet(Object value, UnicodeSet result) {
        if (result == null) result = new UnicodeSet();
        for (int i = 0; i < length - 1; ++i) {
-            if (equator.isEqual(value, values[i])) {
+            if (areEqual(value, values[i])) {
                result.add(transitions[i], transitions[i+1]-1);
            } 
        }
@ -429,13 +439,18 @@ public final class UnicodeMap implements Cloneable {
     * @return result
     */
    public Collection getAvailableValues(Collection result) {
-        if (result == null) result = new ArrayList(1);
-        for (int i = 0; i < length - 1; ++i) {
-            Object value = values[i];
-            if (value == null) continue;
-            if (result.contains(value)) continue;
-            result.add(value);
-        }
+    	if (staleAvailableValues) {
+    		// collect all the current values
+    		// retain them in the availableValues
+    		Set temp = new TreeSet(equator);
+            for (int i = 0; i < length - 1; ++i) {
+                temp.add(values[i]);
+            }
+            availableValues.retainAll(temp);
+            staleAvailableValues = false;
+    	}
+    	if (result == null) result = new ArrayList(1);
+        result.addAll(availableValues);
        return result;
    }
    
@ -539,4 +554,16 @@ public final class UnicodeMap implements Cloneable {
        }
        return result.toString();
    }
+	/**
+	 * @return Returns the errorOnReset.
+	 */
+	public boolean isErrorOnReset() {
+		return errorOnReset;
+	}
+	/**
+	 * @param errorOnReset The errorOnReset to set.
+	 */
+	public void setErrorOnReset(boolean errorOnReset) {
+		this.errorOnReset = errorOnReset;
+	}
 }
--- a/icu4j/src/com/ibm/icu/dev/test/util/UnicodeProperty.java
+++ b/icu4j/src/com/ibm/icu/dev/test/util/UnicodeProperty.java
@ -110,6 +110,10 @@ public abstract class UnicodeProperty extends UnicodeLabel {
        return _getValue(codepoint);
    }

+    //public String getValue(int codepoint, boolean isShort) {
+    //	return getValue(codepoint);
+    //}
+    
    public List getNameAliases(List result) {
        if (result == null) result = new ArrayList(1);
        return _getNameAliases(result);
@ -118,6 +122,7 @@ public abstract class UnicodeProperty extends UnicodeLabel {
        if (result == null) result = new ArrayList(1);
        result = _getValueAliases(valueAlias, result);
        if (!result.contains(valueAlias) && type < NUMERIC) {
+        	result = _getValueAliases(valueAlias, result); // for debugging
            throw new IllegalArgumentException(
                "Internal error: " + getName() + " doesn't contain " + valueAlias
                + ": " + new BagFormatter().join(result));
@ -145,7 +150,373 @@ public abstract class UnicodeProperty extends UnicodeLabel {
    public final List getAvailableValues() {
        return getAvailableValues(null);
    }
+    
+    public final String getValue(int codepoint, boolean getShortest) {
+        String result = getValue(codepoint);
+        if (type >= MISC || result == null || !getShortest) return result;
+        return getFirstValueAlias(result);
+    }

+    public final String getFirstNameAlias() {
+        if (firstNameAlias == null) {
+            firstNameAlias = (String) getNameAliases().get(0);
+        }
+        return firstNameAlias;
+    }
+
+    public final String getFirstValueAlias(String value) {
+        if (valueToFirstValueAlias == null) _getFirstValueAliasCache();
+        return (String)valueToFirstValueAlias.get(value);
+    }
+
+    private void _getFirstValueAliasCache() {
+        maxValueWidth = 0;
+        maxFirstValueAliasWidth = 0;
+        valueToFirstValueAlias = new HashMap(1);
+        Iterator it = getAvailableValues().iterator();
+        while (it.hasNext()) {
+            String value = (String)it.next();
+            String first = (String) getValueAliases(value).get(0);
+            if (first == null) { // internal error
+                throw new IllegalArgumentException("Value not in value aliases: " + value);
+            }
+            if (DEBUG && CHECK_NAME.equals(getName())) {
+                System.out.println("First Alias: " + getName() + ": " + value + " => "
+                 + first + new BagFormatter().join(getValueAliases(value)));
+            }
+            valueToFirstValueAlias.put(value,first);
+            if (value.length() > maxValueWidth) {
+                maxValueWidth = value.length();
+            }
+            if (first.length() > maxFirstValueAliasWidth) {
+                maxFirstValueAliasWidth = first.length();
+            }
+        }
+    }
+
+    private int maxValueWidth = -1;
+    private int maxFirstValueAliasWidth = -1;
+
+    public int getMaxWidth(boolean getShortest) {
+        if (maxValueWidth < 0) _getFirstValueAliasCache();
+        if (getShortest) return maxFirstValueAliasWidth;
+        return maxValueWidth;
+    }
+
+    public final UnicodeSet getSet(String propertyValue) {
+        return getSet(propertyValue,null);
+    }
+    public final UnicodeSet getSet(Matcher matcher) {
+        return getSet(matcher,null);
+    }
+
+    public final UnicodeSet getSet(String propertyValue, UnicodeSet result) {
+        return getSet(new SimpleMatcher(propertyValue,
+            isType(STRING_OR_MISC_MASK) ? null : PROPERTY_COMPARATOR),
+          result);
+    }
+
+    private UnicodeMap unicodeMap = null;
+
+    public static final String UNUSED = "??";
+
+    public final UnicodeSet getSet(Matcher matcher, UnicodeSet result) {
+        if (result == null) result = new UnicodeSet();
+        if (isType(STRING_OR_MISC_MASK)) {
+            for (int i = 0; i <= 0x10FFFF; ++i) {
+                String value = getValue(i);
+                if (value != null && matcher.matches(value)) {
+                    result.add(i);
+                }
+            }
+            return result;
+        }
+        List temp = new ArrayList(1); // to avoid reallocating...
+        UnicodeMap um = getUnicodeMap();
+        Iterator it = um.getAvailableValues(null).iterator();
+        main:
+        while (it.hasNext()) {
+            String value = (String)it.next();
+            temp.clear();
+            Iterator it2 = getValueAliases(value,temp).iterator();
+            while (it2.hasNext()) {
+                String value2 = (String)it2.next();
+                //System.out.println("Values:" + value2);
+                if (matcher.matches(value2)
+                  || matcher.matches(toSkeleton(value2))) {
+                    um.getSet(value, result);
+                    continue main;
+                }
+            }
+        }
+        return result;
+    }
+
+    /*
+    public UnicodeSet getMatchSet(UnicodeSet result) {
+        if (result == null) result = new UnicodeSet();
+        addAll(matchIterator, result);
+        return result;
+    }
+
+    public void setMatchSet(UnicodeSet set) {
+        matchIterator = new UnicodeSetIterator(set);
+    }
+    */
+
+    /**
+     * Utility for debugging
+     */
+    public static String getStack() {
+        Exception e = new Exception();
+        StringWriter sw = new StringWriter();
+        PrintWriter pw = new PrintWriter(sw);
+        e.printStackTrace(pw);
+        pw.flush();
+        return "Showing Stack with fake " + sw.getBuffer().toString();
+    }
+
+    // TODO use this instead of plain strings
+    public static class Name implements Comparable {
+        private static Map skeletonCache;
+        private String skeleton;
+        private String pretty;
+        public final int RAW = 0, TITLE = 1, NORMAL = 2;
+        public Name(String name, int style) {
+            if (name == null) name = "";
+            if (style == RAW) {
+                skeleton = pretty = name;
+            } else {
+                pretty = regularize(name, style == TITLE);
+                skeleton = toSkeleton(pretty);
+            }
+        }
+        public int compareTo(Object o) {
+            return skeleton.compareTo(((Name)o).skeleton);
+        }
+        public boolean equals(Object o) {
+            return skeleton.equals(((Name)o).skeleton);
+        }
+        public int hashCode() {
+            return skeleton.hashCode();
+        }
+        public String toString() {
+            return pretty;
+        }
+    }
+
+    /**
+     * @return the unicode map
+     */
+    protected UnicodeMap getUnicodeMap() {
+        if (unicodeMap == null) unicodeMap = _getUnicodeMap();
+        return unicodeMap;
+    }
+
+    protected UnicodeMap _getUnicodeMap() {
+        UnicodeMap result = new UnicodeMap();
+        for (int i = 0; i <= 0x10FFFF; ++i) {
+            //if (DEBUG && i == 0x41) System.out.println(i + "\t" + getValue(i));
+            result.put(i, getValue(i));
+        }
+        if (DEBUG && CHECK_NAME.equals(getName())) {
+            System.out.println(getName() + ":\t" + getClass().getName()
+                 + "\t" + getVersion());
+            System.out.println(getStack());
+            System.out.println(result);
+        }
+        return result;
+    }
+    
+    /**
+     * Really ought to create a Collection UniqueList, that forces uniqueness. But for now...
+     */
+    public static Collection addUnique(Object obj, Collection result) {
+        if (obj != null && !result.contains(obj)) result.add(obj);
+        return result;
+    }
+
+    /**
+     * Utility for managing property & non-string value aliases
+     */
+    public static final Comparator PROPERTY_COMPARATOR = new Comparator() {
+        public int compare(Object o1, Object o2) {
+            return compareNames((String)o1, (String)o2);
+        }
+    };
+
+    /**
+     * Utility for managing property & non-string value aliases
+     *
+     */
+    // TODO optimize
+    public static boolean equalNames(String a, String b) {
+        if (a == b) return true;
+        if (a == null) return false;
+         return toSkeleton(a).equals(toSkeleton(b));
+    }
+
+    /**
+     * Utility for managing property & non-string value aliases
+     */
+    // TODO optimize
+    public static int compareNames(String a, String b) {
+        if (a == b) return 0;
+        if (a == null) return -1;
+        if (b == null) return 1;
+        return toSkeleton(a).compareTo(toSkeleton(b));
+    }
+
+    /**
+     * Utility for managing property & non-string value aliases
+     */
+    // TODO account for special names, tibetan, hangul
+    public static String toSkeleton(String source) {
+        if (source == null) return null;
+        StringBuffer skeletonBuffer = new StringBuffer();
+        boolean gotOne = false;
+        // remove spaces, '_', '-'
+        // we can do this with char, since no surrogates are involved
+        for (int i = 0; i < source.length(); ++i) {
+            char ch = source.charAt(i);
+            if (i > 0 && (ch == '_' || ch == ' ' || ch == '-')) {
+                gotOne = true;
+            } else {
+                char ch2 = Character.toLowerCase(ch);
+                if (ch2 != ch) {
+                    gotOne = true;
+                    skeletonBuffer.append(ch2);
+                } else {
+                    skeletonBuffer.append(ch);
+                }
+            }
+        }
+        if (!gotOne) return source; // avoid string creation
+        return skeletonBuffer.toString();
+    }
+
+    // get the name skeleton
+    public static String toNameSkeleton(String source) {
+        if (source == null) return null;
+        StringBuffer result = new StringBuffer();
+        // remove spaces, medial '-'
+        // we can do this with char, since no surrogates are involved
+        for (int i = 0; i < source.length(); ++i) {
+            char ch = source.charAt(i);
+            if (('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'Z') || ch == '<' || ch == '>') {
+                result.append(ch);
+            } else if (ch == ' ') {
+                // don't copy ever
+            } else if (ch == '-') {
+                // only copy non-medials AND trailing O-E
+                if (0 == i
+                    || i == source.length() - 1
+                    || source.charAt(i-1) == ' '
+                    || source.charAt(i+1) == ' '
+                    || (i == source.length() - 2
+                        && source.charAt(i-1) == 'O'
+                        && source.charAt(i+1) == 'E')) {
+                    System.out.println("****** EXCEPTION " + source);
+                    result.append(ch);
+                }
+                // otherwise don't copy
+            } else {
+                throw new IllegalArgumentException("Illegal Name Char: U+" + Utility.hex(ch) + ", " + ch);
+            }
+        }
+        return result.toString();
+    }
+
+    /**
+     * These routines use the Java functions, because they only need to act on ASCII
+     * Changes space, - into _, inserts _ between lower and UPPER.
+     */
+    public static String regularize(String source, boolean titlecaseStart) {
+        if (source == null) return source;
+        /*if (source.equals("noBreak")) { // HACK
+            if (titlecaseStart) return "NoBreak";
+            return source;
+        }
+        */
+        StringBuffer result = new StringBuffer();
+        int lastCat = -1;
+        boolean haveFirstCased = true;
+        for (int i = 0; i < source.length(); ++i) {
+            char c = source.charAt(i);
+            if (c == ' ' || c == '-' || c == '_') {
+                c = '_';
+                haveFirstCased = true;
+            }
+            if (c == '=') haveFirstCased = true;
+            int cat = Character.getType(c);
+            if (lastCat == Character.LOWERCASE_LETTER && cat == Character.UPPERCASE_LETTER) {
+                result.append('_');
+            }
+            if (haveFirstCased && (cat == Character.LOWERCASE_LETTER
+                    || cat == Character.TITLECASE_LETTER || cat == Character.UPPERCASE_LETTER)) {
+                if (titlecaseStart) {
+                    c = Character.toUpperCase(c);
+                }
+                haveFirstCased = false;
+            }
+            result.append(c);
+            lastCat = cat;
+        }
+        return result.toString();
+    }
+
+    /**
+     * Utility function for comparing codepoint to string without
+     * generating new string.
+     * @param codepoint
+     * @param other
+     * @return true if the codepoint equals the string
+     */
+    public static final boolean equals(int codepoint, String other) {
+        if (other.length() == 1) {
+            return codepoint == other.charAt(0);
+        }
+        if (other.length() == 2) {
+            return other.equals(UTF16.valueOf(codepoint));
+        }
+        return false;
+    }
+
+    /**
+     * Utility that should be on UnicodeSet
+     * @param source
+     * @param result
+     */
+    static public void addAll(UnicodeSetIterator source, UnicodeSet result) {
+        while (source.nextRange()) {
+            if (source.codepoint == UnicodeSetIterator.IS_STRING) {
+                result.add(source.string);
+            } else {
+                result.add(source.codepoint, source.codepointEnd);
+            }
+        }
+    }
+
+    /**
+     * Really ought to create a Collection UniqueList, that forces uniqueness. But for now...
+     */
+    public static Collection addAllUnique(Collection source, Collection result) {
+        for (Iterator it = source.iterator(); it.hasNext();) {
+            addUnique(it.next(), result);
+        }
+        return result;
+    }
+
+    /**
+     * Really ought to create a Collection UniqueList, that forces uniqueness. But for now...
+     */
+    public static Collection addAllUnique(Object[] source, Collection result) {
+        for (int i = 0; i < source.length; ++i) {
+            addUnique(source[i], result);
+        }
+        return result;
+    }
+    
    static public class Factory {
        static boolean DEBUG = false;

@ -502,22 +873,32 @@ public abstract class UnicodeProperty extends UnicodeLabel {
            return matcher.matches();
        }
    }
+    
+    public static abstract class BaseProperty extends UnicodeProperty {
+        protected List propertyAliases = new ArrayList(1);
+        String version;
+        public BaseProperty setMain(String alias, String shortAlias, int propertyType,
+                String version) {
+                  setName(alias);
+                  setType(propertyType);
+                  propertyAliases.add(shortAlias);
+                  propertyAliases.add(alias);
+                  this.version = version;
+                  return this;
+        }    	
+        public String _getVersion() {
+            return version;
+        }
+        public List _getNameAliases(List result) {
+            addAllUnique(propertyAliases, result);
+            return result;
+        }

-    public static abstract class SimpleProperty extends UnicodeProperty {
-        private List propertyAliases = new ArrayList(1);
+    }
+    
+    public static abstract class SimpleProperty extends BaseProperty {
        List values;
        Map toValueAliases = new HashMap(1);
-        String version;
-
-        public SimpleProperty setMain(String alias, String shortAlias, int propertyType,
-          String version) {
-            setName(alias);
-            setType(propertyType);
-            propertyAliases.add(shortAlias);
-            propertyAliases.add(alias);
-            this.version = version;
-            return this;
-        }

        public SimpleProperty addName(String alias) {
            propertyAliases.add(alias);
@ -546,11 +927,6 @@ public abstract class UnicodeProperty extends UnicodeLabel {
            return this;
        }

-        public List _getNameAliases(List result) {
-            addAllUnique(propertyAliases, result);
-            return result;
-        }
-
        public List _getValueAliases(String valueAlias, List result) {
            if (toValueAliases == null) _fillValues();
            List a = (List) toValueAliases.get(valueAlias);
@ -582,384 +958,27 @@ public abstract class UnicodeProperty extends UnicodeLabel {
            addUnique(alias, aliases);
            addUnique(item, aliases);
        }
-
        public String _getVersion() {
            return version;
        }
    }
-
-    public static class UnicodeMapProperty extends SimpleProperty {
-        private UnicodeMap unicodeMap;
+    
+    public static class UnicodeMapProperty extends BaseProperty {
+        protected UnicodeMap unicodeMap;
        protected String _getValue(int codepoint) {
            return (String) unicodeMap.getValue(codepoint);
        }
+		protected List _getValueAliases(String valueAlias, List result) {
+			if (!unicodeMap.getAvailableValues().contains(valueAlias)) return result;
+			result.add(valueAlias);
+			return result; // no other aliases
+		}
+		protected List _getAvailableValues(List result) {
+			return (List) unicodeMap.getAvailableValues(result);
+		}
    }


-    public final String getValue(int codepoint, boolean getShortest) {
-        String result = getValue(codepoint);
-        if (type >= MISC || result == null || !getShortest) return result;
-        return getFirstValueAlias(result);
-    }

-    public final String getFirstNameAlias() {
-        if (firstNameAlias == null) {
-            firstNameAlias = (String) getNameAliases().get(0);
-        }
-        return firstNameAlias;
-    }
-
-    public final String getFirstValueAlias(String value) {
-        if (valueToFirstValueAlias == null) _getFirstValueAliasCache();
-        return (String)valueToFirstValueAlias.get(value);
-    }
-
-    private void _getFirstValueAliasCache() {
-        maxValueWidth = 0;
-        maxFirstValueAliasWidth = 0;
-        valueToFirstValueAlias = new HashMap(1);
-        Iterator it = getAvailableValues().iterator();
-        while (it.hasNext()) {
-            String value = (String)it.next();
-            String first = (String) getValueAliases(value).get(0);
-            if (first == null) { // internal error
-                throw new IllegalArgumentException("Value not in value aliases: " + value);
-            }
-            if (DEBUG && CHECK_NAME.equals(getName())) {
-                System.out.println("First Alias: " + getName() + ": " + value + " => "
-                 + first + new BagFormatter().join(getValueAliases(value)));
-            }
-            valueToFirstValueAlias.put(value,first);
-            if (value.length() > maxValueWidth) {
-                maxValueWidth = value.length();
-            }
-            if (first.length() > maxFirstValueAliasWidth) {
-                maxFirstValueAliasWidth = first.length();
-            }
-        }
-    }
-
-    private int maxValueWidth = -1;
-    private int maxFirstValueAliasWidth = -1;
-
-    public int getMaxWidth(boolean getShortest) {
-        if (maxValueWidth < 0) _getFirstValueAliasCache();
-        if (getShortest) return maxFirstValueAliasWidth;
-        return maxValueWidth;
-    }
-
-    public final UnicodeSet getSet(String propertyValue) {
-        return getSet(propertyValue,null);
-    }
-    public final UnicodeSet getSet(Matcher matcher) {
-        return getSet(matcher,null);
-    }
-
-    public final UnicodeSet getSet(String propertyValue, UnicodeSet result) {
-        return getSet(new SimpleMatcher(propertyValue,
-            isType(STRING_OR_MISC_MASK) ? null : PROPERTY_COMPARATOR),
-          result);
-    }
-
-    private UnicodeMap unicodeMap = null;
-
-    public static final String UNUSED = "??";
-
-    public final UnicodeSet getSet(Matcher matcher, UnicodeSet result) {
-        if (result == null) result = new UnicodeSet();
-        if (isType(STRING_OR_MISC_MASK)) {
-            for (int i = 0; i <= 0x10FFFF; ++i) {
-                String value = getValue(i);
-                if (value != null && matcher.matches(value)) {
-                    result.add(i);
-                }
-            }
-            return result;
-        }
-        List temp = new ArrayList(1); // to avoid reallocating...
-        UnicodeMap um = getUnicodeMap();
-        Iterator it = um.getAvailableValues(null).iterator();
-        main:
-        while (it.hasNext()) {
-            String value = (String)it.next();
-            temp.clear();
-            Iterator it2 = getValueAliases(value,temp).iterator();
-            while (it2.hasNext()) {
-                String value2 = (String)it2.next();
-                //System.out.println("Values:" + value2);
-                if (matcher.matches(value2)
-                  || matcher.matches(toSkeleton(value2))) {
-                    um.getSet(value, result);
-                    continue main;
-                }
-            }
-        }
-        return result;
-    }
-
-    /*
-    public UnicodeSet getMatchSet(UnicodeSet result) {
-        if (result == null) result = new UnicodeSet();
-        addAll(matchIterator, result);
-        return result;
-    }
-
-    public void setMatchSet(UnicodeSet set) {
-        matchIterator = new UnicodeSetIterator(set);
-    }
-    */
-
-    /**
-     * Utility for debugging
-     */
-    public static String getStack() {
-        Exception e = new Exception();
-        StringWriter sw = new StringWriter();
-        PrintWriter pw = new PrintWriter(sw);
-        e.printStackTrace(pw);
-        pw.flush();
-        return "Showing Stack with fake " + sw.getBuffer().toString();
-    }
-
-    // TODO use this instead of plain strings
-    public static class Name implements Comparable {
-        private static Map skeletonCache;
-        private String skeleton;
-        private String pretty;
-        public final int RAW = 0, TITLE = 1, NORMAL = 2;
-        public Name(String name, int style) {
-            if (name == null) name = "";
-            if (style == RAW) {
-                skeleton = pretty = name;
-            } else {
-                pretty = regularize(name, style == TITLE);
-                skeleton = toSkeleton(pretty);
-            }
-        }
-        public int compareTo(Object o) {
-            return skeleton.compareTo(((Name)o).skeleton);
-        }
-        public boolean equals(Object o) {
-            return skeleton.equals(((Name)o).skeleton);
-        }
-        public int hashCode() {
-            return skeleton.hashCode();
-        }
-        public String toString() {
-            return pretty;
-        }
-    }
-    /**
-     * Utility for managing property & non-string value aliases
-     */
-    public static final Comparator PROPERTY_COMPARATOR = new Comparator() {
-        public int compare(Object o1, Object o2) {
-            return compareNames((String)o1, (String)o2);
-        }
-    };
-
-    /**
-     * Utility for managing property & non-string value aliases
-     *
-     */
-    // TODO optimize
-    public static boolean equalNames(String a, String b) {
-        if (a == b) return true;
-        if (a == null) return false;
-         return toSkeleton(a).equals(toSkeleton(b));
-    }
-
-    /**
-     * Utility for managing property & non-string value aliases
-     */
-    // TODO optimize
-    public static int compareNames(String a, String b) {
-        if (a == b) return 0;
-        if (a == null) return -1;
-        if (b == null) return 1;
-        return toSkeleton(a).compareTo(toSkeleton(b));
-    }
-
-    /**
-     * Utility for managing property & non-string value aliases
-     */
-    // TODO account for special names, tibetan, hangul
-    public static String toSkeleton(String source) {
-        if (source == null) return null;
-        StringBuffer skeletonBuffer = new StringBuffer();
-        boolean gotOne = false;
-        // remove spaces, '_', '-'
-        // we can do this with char, since no surrogates are involved
-        for (int i = 0; i < source.length(); ++i) {
-            char ch = source.charAt(i);
-            if (i > 0 && (ch == '_' || ch == ' ' || ch == '-')) {
-                gotOne = true;
-            } else {
-                char ch2 = Character.toLowerCase(ch);
-                if (ch2 != ch) {
-                    gotOne = true;
-                    skeletonBuffer.append(ch2);
-                } else {
-                    skeletonBuffer.append(ch);
-                }
-            }
-        }
-        if (!gotOne) return source; // avoid string creation
-        return skeletonBuffer.toString();
-    }
-
-    // get the name skeleton
-    public static String toNameSkeleton(String source) {
-        if (source == null) return null;
-        StringBuffer result = new StringBuffer();
-        // remove spaces, medial '-'
-        // we can do this with char, since no surrogates are involved
-        for (int i = 0; i < source.length(); ++i) {
-            char ch = source.charAt(i);
-            if (('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'Z') || ch == '<' || ch == '>') {
-                result.append(ch);
-            } else if (ch == ' ') {
-                // don't copy ever
-            } else if (ch == '-') {
-                // only copy non-medials AND trailing O-E
-                if (0 == i
-                    || i == source.length() - 1
-                    || source.charAt(i-1) == ' '
-                    || source.charAt(i+1) == ' '
-                    || (i == source.length() - 2
-                        && source.charAt(i-1) == 'O'
-                        && source.charAt(i+1) == 'E')) {
-                    System.out.println("****** EXCEPTION " + source);
-                    result.append(ch);
-                }
-                // otherwise don't copy
-            } else {
-                throw new IllegalArgumentException("Illegal Name Char: U+" + Utility.hex(ch) + ", " + ch);
-            }
-        }
-        return result.toString();
-    }
-
-    /**
-     * These routines use the Java functions, because they only need to act on ASCII
-     * Changes space, - into _, inserts _ between lower and UPPER.
-     */
-    public static String regularize(String source, boolean titlecaseStart) {
-        if (source == null) return source;
-        /*if (source.equals("noBreak")) { // HACK
-            if (titlecaseStart) return "NoBreak";
-            return source;
-        }
-        */
-        StringBuffer result = new StringBuffer();
-        int lastCat = -1;
-        boolean haveFirstCased = true;
-        for (int i = 0; i < source.length(); ++i) {
-            char c = source.charAt(i);
-            if (c == ' ' || c == '-' || c == '_') {
-                c = '_';
-                haveFirstCased = true;
-            }
-            if (c == '=') haveFirstCased = true;
-            int cat = Character.getType(c);
-            if (lastCat == Character.LOWERCASE_LETTER && cat == Character.UPPERCASE_LETTER) {
-                result.append('_');
-            }
-            if (haveFirstCased && (cat == Character.LOWERCASE_LETTER
-                    || cat == Character.TITLECASE_LETTER || cat == Character.UPPERCASE_LETTER)) {
-                if (titlecaseStart) {
-                    c = Character.toUpperCase(c);
-                }
-                haveFirstCased = false;
-            }
-            result.append(c);
-            lastCat = cat;
-        }
-        return result.toString();
-    }
-
-    /**
-     * Utility function for comparing codepoint to string without
-     * generating new string.
-     * @param codepoint
-     * @param other
-     * @return true if the codepoint equals the string
-     */
-    public static final boolean equals(int codepoint, String other) {
-        if (other.length() == 1) {
-            return codepoint == other.charAt(0);
-        }
-        if (other.length() == 2) {
-            return other.equals(UTF16.valueOf(codepoint));
-        }
-        return false;
-    }
-
-    /**
-     * Utility that should be on UnicodeSet
-     * @param source
-     * @param result
-     */
-    static public void addAll(UnicodeSetIterator source, UnicodeSet result) {
-        while (source.nextRange()) {
-            if (source.codepoint == UnicodeSetIterator.IS_STRING) {
-                result.add(source.string);
-            } else {
-                result.add(source.codepoint, source.codepointEnd);
-            }
-        }
-    }
-
-    /**
-     * Really ought to create a Collection UniqueList, that forces uniqueness. But for now...
-     */
-    public static Collection addUnique(Object obj, Collection result) {
-        if (obj != null && !result.contains(obj)) result.add(obj);
-        return result;
-    }
-
-    /**
-     * Really ought to create a Collection UniqueList, that forces uniqueness. But for now...
-     */
-    public static Collection addAllUnique(Collection source, Collection result) {
-        for (Iterator it = source.iterator(); it.hasNext();) {
-            addUnique(it.next(), result);
-        }
-        return result;
-    }
-
-    /**
-     * Really ought to create a Collection UniqueList, that forces uniqueness. But for now...
-     */
-    public static Collection addAllUnique(Object[] source, Collection result) {
-        for (int i = 0; i < source.length; ++i) {
-            addUnique(source[i], result);
-        }
-        return result;
-    }
-
-
-    /**
-     * @return the unicode map
-     */
-    protected UnicodeMap getUnicodeMap() {
-        if (unicodeMap == null) unicodeMap = _getUnicodeMap();
-        return unicodeMap;
-    }
-
-    protected UnicodeMap _getUnicodeMap() {
-        UnicodeMap result = new UnicodeMap();
-        for (int i = 0; i <= 0x10FFFF; ++i) {
-            //if (DEBUG && i == 0x41) System.out.println(i + "\t" + getValue(i));
-            result.put(i, getValue(i));
-        }
-        if (DEBUG && CHECK_NAME.equals(getName())) {
-            System.out.println(getName() + ":\t" + getClass().getName()
-                 + "\t" + getVersion());
-            System.out.println(getStack());
-            System.out.println(result);
-        }
-        return result;
-    }
 }

--- a/icu4j/src/com/ibm/icu/dev/tool/cldr/GenerateCldrTests.java
+++ b/icu4j/src/com/ibm/icu/dev/tool/cldr/GenerateCldrTests.java
@ -8,7 +8,9 @@
 */
 package com.ibm.icu.dev.tool.cldr;

+import java.io.BufferedReader;
 import java.io.File;
+import java.io.IOException;
 import java.io.PrintWriter;
 import java.io.StringWriter;
 import java.io.Writer;
@ -78,7 +80,9 @@ public class GenerateCldrTests {
        LOGDIR = 3,
        SOURCEDIR =4,
        MATCH = 5,
-        FULLY_RESOLVED = 6;
+        FULLY_RESOLVED = 6,
+		LANGUAGES = 7,
+		TZADIR = 8;

    private static final UOption[] options = {
            UOption.HELP_H(),
@ -88,17 +92,38 @@ public class GenerateCldrTests {
            UOption.SOURCEDIR().setDefault("C:\\ICU4C\\locale\\common\\"),
            UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"),
            UOption.create("fullyresolved", 'f', UOption.NO_ARG),
+            UOption.create("languages", 'g', UOption.NO_ARG),
+            UOption.create("tzadir", 't', UOption.REQUIRES_ARG).setDefault("C:\\ICU4J\\icu4j\\src\\com\\ibm\\icu\\dev\\tool\\cldr\\"),
    };

    CldrCollations cldrCollations;
    static String logDir = null, destDir = null;
+    
+    public static boolean hasLocalizedLanguageFor(ULocale locale, ULocale otherLocale) {
+    	String lang = otherLocale.getLanguage();
+    	String localizedVersion = otherLocale.getDisplayLanguage(locale);
+    	return !lang.equals(localizedVersion);
+    }
+  
+    public static boolean hasLocalizedCountryFor(ULocale locale, ULocale otherLocale) {
+    	String country = otherLocale.getCountry();
+    	if (country.equals("")) return true;
+    	String localizedVersion = otherLocale.getDisplayCountry(locale);
+    	return !country.equals(localizedVersion);
+    }

-    public static void main(String[] args) throws Exception {
+	public static void main(String[] args) throws Exception {
        UOption.parseArgs(args, options);
        log = BagFormatter.openUTF8Writer(options[LOGDIR].value, "log.txt");
        try {
+        	if (options[LANGUAGES].doesOccur) {
+        		generateSize(true);
+        		return;
+        	}
+        	//generateSize();
+        	//if (true) return;
+			//compareAvailable();

-            //compareAvailable();
            //if (true) return;
            //System.out.println(createCaseClosure(new UnicodeSet("[a{bc}{def}{oss}]")));
            //System.out.println(createCaseClosure(new UnicodeSet("[a-z\u00c3\u0178{aa}]")));
@ -118,9 +143,271 @@ public class GenerateCldrTests {
    }

    /**
-     *
-     */
-    /*
+     * @throws IOException
+	 * 
+	 */
+	private static void generateSize(boolean transliterate) throws IOException {
+		PrintWriter logHtml = BagFormatter.openUTF8Writer(options[LOGDIR].value, "log.html");
+		String dir = options[SOURCEDIR].value + "main" + File.separator;
+		DraftChecker dc = new DraftChecker(dir);
+		Set filenames = getMatchingXMLFiles(dir, ".*");
+		Collator col = Collator.getInstance(ULocale.ENGLISH);
+		Set languages = new TreeSet(col), countries = new TreeSet(col), 
+			draftLanguages = new TreeSet(col), draftCountries = new TreeSet(col);
+		Map nativeLanguages = new TreeMap(col), nativeCountries = new TreeMap(col),
+			draftNativeLanguages = new TreeMap(col), draftNativeCountries = new TreeMap(col);
+		int localeCount = 0;
+		int draftLocaleCount = 0;
+		for (Iterator it = filenames.iterator(); it.hasNext();) {
+			String localeName = (String) it.next();
+			if (localeName.equals("root")) continue; // skip root
+			boolean draft = dc.isDraft(localeName);
+			if (draft) {
+				draftLocaleCount++;
+				addCounts(localeName, true, draftLanguages, draftCountries, draftNativeLanguages, draftNativeCountries, col);
+			} else {
+				localeCount++;
+				addCounts(localeName, false, languages, countries, nativeLanguages, nativeCountries, col);
+			}
+			if (false) log.println(draft + ", " + localeCount + ", " + languages.size() + ", " + countries.size() + ", " 
+					+ draftLocaleCount + ", " + draftLanguages.size() + ", " + draftCountries.size());
+		}
+		draftLanguages.removeAll(languages);
+		for (Iterator it = nativeLanguages.keySet().iterator(); it.hasNext();) {
+			draftNativeLanguages.remove(it.next());
+		}
+		logHtml.println("<html><head>");
+		logHtml.println("<meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
+		logHtml.println("</head><body>");
+		logHtml.println("<p><b>Locales:</b> " + localeCount);
+		logHtml.println("<p><b>Languages:</b> " + languages.size());
+		logHtml.println(showSet(nativeLanguages, transliterate, true));
+		logHtml.println("<p><b>Countries:</b> " + countries.size());
+		logHtml.println(showSet(nativeCountries, transliterate, false));
+		logHtml.println("<p><b>Draft locales:</b> " + draftLocaleCount);
+		logHtml.println("<p><b>Draft languages:</b> " + draftLanguages.size());
+		logHtml.println(showSet(draftNativeLanguages, transliterate, true));
+		logHtml.println("<p><b>Draft countries:</b> " + draftCountries.size());
+		logHtml.println(showSet(draftNativeCountries, transliterate, false));
+		logHtml.println("</body></html>");
+		logHtml.close();
+	}
+	
+	static final UnicodeSet NON_LATIN = new UnicodeSet("[^[:latin:][:common:][:inherited:]]");
+	
+	/**
+	 * @param uloc
+	 * @param isDraft TODO
+	 * @param draftLanguages
+	 * @param draftCountries
+	 * @param draftNativeLanguages
+	 * @param draftNativeCountries
+	 * @param lang
+	 * @param country
+	 */
+	private static void addCounts(String localeName, boolean isDraft, Set draftLanguages, Set draftCountries,
+			Map draftNativeLanguages, Map draftNativeCountries, Comparator col) {
+		ULocale uloc = new ULocale(localeName);
+		String lang = localeName, country = "";
+		if (localeName.length() > 3 && localeName.charAt(localeName.length() - 3) == '_') {
+			lang = localeName.substring(0, localeName.length() - 3);
+			country = localeName.substring(localeName.length() - 2);
+		}
+		
+		String nativeName, englishName;
+		draftLanguages.add(lang);
+		nativeName = uloc.getDisplayLanguage(uloc);
+		englishName = uloc.getDisplayLanguage(ULocale.ENGLISH);
+		if (!lang.equals("en") && nativeName.equals(englishName)) {
+			log.println((isDraft ? "D" : "") +"\tWarning: in " + localeName + ", display name for " + lang + " equals English: "  + nativeName);
+		}
+		draftNativeLanguages.put(fixedTitleCase(uloc, nativeName), localeName);
+		if (!country.equals("")) {
+			draftCountries.add(country);
+			nativeName = getFixedDisplayCountry(uloc, uloc);
+			englishName = getFixedDisplayCountry(uloc, ULocale.ENGLISH);
+			if (!lang.equals("en") && nativeName.equals(englishName)) {
+				log.println((isDraft ? "D" : "") + "\tWarning: in " + localeName + ", display name for " + country + " equals English: "  + nativeName);
+			}
+			draftNativeCountries.put(fixedTitleCase(uloc, nativeName), localeName);
+		}
+	}
+	
+	static String fixedTitleCase(ULocale uloc, String in) {
+		String result = UCharacter.toTitleCase(uloc, in, null);
+		result = replace(result, "U.s.", "U.S.");
+		result = replace(result, "S.a.r.", "S.A.R.");
+		return result;
+	}
+	/*
+	static void addMapSet(Map m, Object key, Object value, Comparator com) {
+		Set valueSet = (Set) m.get(key);
+		if (valueSet == null) {
+			valueSet = new TreeSet(com);
+			m.put(key, valueSet);
+		}
+		valueSet.add(value);
+	}
+	*/
+	/**
+	 * @param uloc
+	 * @return
+	 */
+	private static String getFixedDisplayCountry(ULocale uloc, ULocale forLanguage) {
+		String name = uloc.getDisplayCountry(forLanguage);
+		Object trial = fixCountryNames.get(name);
+		if (trial != null) {
+			return (String)trial;
+		}
+		return name;
+	}
+	
+	static Map fixCountryNames = new HashMap(); 
+	static {
+		fixCountryNames.put("\u0408\u0443\u0433\u043E\u0441\u043B\u0430\u0432\u0438\u0458\u0430", "\u0421\u0440\u0431\u0438\u0458\u0430 \u0438 \u0426\u0440\u043D\u0430 \u0413\u043E\u0440\u0430");
+		fixCountryNames.put("Jugoslavija", "Srbija i Crna Gora");
+		fixCountryNames.put("Yugoslavia", "Serbia and Montenegro");
+	}
+	static {
+		// HACK around lack of Armenian, Ethiopic				
+		registerTransliteratorFromFile(options[TZADIR].value, "Latin-Armenian");
+		registerTransliteratorFromFile(options[TZADIR].value, "Latin-Ethiopic");
+		registerTransliteratorFromFile(options[TZADIR].value, "Cyrillic-Latin");
+		registerTransliteratorFromFile(options[TZADIR].value, "Arabic-Latin");		
+	}
+	public static final Transliterator toLatin = Transliterator.getInstance("any-latin");
+	
+	static void registerTransliteratorFromFile(String dir, String id) {
+		try {
+			String filename = id.replace('-', '_');
+			BufferedReader br = BagFormatter.openUTF8Reader(dir, filename + ".txt");
+			StringBuffer buffer = new StringBuffer();
+			while (true) {
+				String line = br.readLine();
+				if (line == null) break;
+				if (line.length() > 0 && line.charAt(0) == '\uFEFF') line = line.substring(1);
+				buffer.append(line).append("\r\n");
+			}
+			br.close();
+			String rules = buffer.toString();
+			Transliterator t;
+			int pos = id.indexOf('-');
+			String rid;
+			if (pos < 0) {
+				rid = id + "-Any";
+				id = "Any-" + id;
+			} else {
+				rid = id.substring(pos+1) + "-" + id.substring(0, pos);
+			}
+			Transliterator.unregister(id);
+			t = Transliterator.createFromRules(id, rules, Transliterator.FORWARD);
+			Transliterator.registerInstance(t);
+
+			/*String test = "\u049A\u0430\u0437\u0430\u049B";
+			System.out.println(t.transliterate(test));
+			t = Transliterator.getInstance(id);
+			System.out.println(t.transliterate(test));
+			*/
+
+			Transliterator.unregister(rid);
+			t = Transliterator.createFromRules(rid, rules, Transliterator.REVERSE);
+			Transliterator.registerInstance(t);
+			System.out.println("Registered new Transliterator: " + id + ", " + rid);
+		} catch (IOException e) {
+			e.printStackTrace();
+			throw new IllegalArgumentException("Can't open " + dir + ", " + id);
+		}
+	}
+
+	/**
+	 * @param nativeCountries
+	 * @param transliterate TODO
+	 * @param isLanguage TODO
+	 */
+	private static String showSet(Map nativeCountries, boolean transliterate, boolean isLanguage) {
+		UnicodeSet BIDI_R = new UnicodeSet("[[:Bidi_Class=R:][:Bidi_Class=AL:]]");
+		StringBuffer result = new StringBuffer();
+		for (Iterator it = nativeCountries.keySet().iterator(); it.hasNext();) {
+			String name = (String) it.next();
+			String locale = (String) nativeCountries.get(name);
+			String lang = locale, country = "";
+			if (locale.length() > 3 && locale.charAt(locale.length() - 3) == '_') {
+				lang = locale.substring(0, locale.length() - 3);
+				country = locale.substring(locale.length() - 2);
+			}
+
+			if (result.length() != 0) {
+				result.append(", ");
+			}
+			String title = "";
+			if (isLanguage) {
+				title = lang + ", " + new ULocale(locale).getDisplayLanguage(ULocale.ENGLISH);
+			} else {
+				title = country + ", " + getFixedDisplayCountry(new ULocale(locale), ULocale.ENGLISH);
+			}
+			if (transliterate && NON_LATIN.containsSome(name) && !lang.equals("ja")) {
+				String transName = fixedTitleCase(ULocale.ENGLISH, toLatin.transliterate(name));
+				if (NON_LATIN.containsSome(transName)) {
+					log.println("Can't transliterate " + name + ": " + transName);
+				} else {
+					title += ", " + transName;
+				}
+			}
+			String before = "", after = "";
+			if (title.length() != 0) {
+				before = "<span title=\'" + BagFormatter.toHTML.transliterate(title) + "'>";
+				after = "</span>";
+			}
+			boolean isBIDI = BIDI_R.containsSome(name);
+			if (isBIDI) result.append('\u200E');
+			result.append(before).append(BagFormatter.toHTML.transliterate(name)).append(after);
+			if (isBIDI) result.append('\u200E');			
+		}
+		return result.toString();
+	}
+
+	public static class DraftChecker {
+		String dir;
+		Map cache = new HashMap();
+		Object TRUE = new Object();
+		Object FALSE = new Object();
+		public DraftChecker(String dir) {
+			this.dir = dir;
+		}
+		
+		public boolean isDraft(String localeName) {
+			Object check = cache.get(localeName);
+			if (check != null) {
+				return check == TRUE;
+			}
+			BufferedReader pw = null;
+			boolean result = true;
+			try {
+				pw = BagFormatter.openUTF8Reader(dir, localeName + ".xml");
+				while (true) {
+					String line = pw.readLine();
+					assert (line != null); // should never get here
+					if (line.indexOf("<ldml") >= 0) {
+						if (line.indexOf("draft") >= 0) {
+							check = TRUE;
+						} else {
+							check = FALSE;
+						}
+						break;
+					}
+				}
+				pw.close();
+			} catch (IOException e) {				
+				e.printStackTrace();
+				throw new IllegalArgumentException("Failure on " + localeName + ": " + dir + localeName + ".xml");
+			}
+			cache.put(localeName, check);
+			return check == TRUE;
+		}
+	}
+
+
+	/*
    private static void compareAvailable() {
        ULocale[] cols = Collator.getAvailableULocales();
        Locale[] alocs = NumberFormat.getAvailableLocales();
@ -137,11 +424,29 @@ public class GenerateCldrTests {
    }
    */

-    /**
-     * @param sLocs
-     */
-    private static void showLocales(Set sLocs) {
-        for (Iterator it = sLocs.iterator(); it.hasNext();) {
+	/**
+	 * 
+	 */
+	private static void checkLocaleNames() {
+		ULocale[] locales = ULocale.getAvailableLocales();
+		for (int i = 0; i < locales.length; ++i) {
+			if (!hasLocalizedCountryFor(ULocale.ENGLISH, locales[i])
+					|| !hasLocalizedLanguageFor(ULocale.ENGLISH, locales[i])
+					|| !hasLocalizedCountryFor(locales[i], locales[i])
+					|| !hasLocalizedLanguageFor(locales[i], locales[i])) {
+				log.print("FAILURE\t");
+			} else {
+				log.print("       \t");
+			}
+			log.println(locales[i] + "\t" + locales[i].getDisplayName(ULocale.ENGLISH) + "\t" + locales[i].getDisplayName(locales[i]));
+		}
+	}
+
+	/**
+	 * @param sLocs
+	 */
+	private static void showLocales(Set sLocs) {
+		for (Iterator it = sLocs.iterator(); it.hasNext();) {
            String s = (String) it.next();
            log.println(s + "\t" + ULocale.getDisplayLanguage(s,"en"));
        }
@ -235,13 +540,18 @@ public class GenerateCldrTests {
    }

    CldrOthers cldrOthers;
-
+    
    void generate(String pat) throws Exception {
        cldrOthers = new CldrOthers(options[SOURCEDIR].value + "main" + File.separator, pat);
        cldrOthers.show();
+
        //if (true) return;
        cldrCollations = new CldrCollations(options[SOURCEDIR].value + "collation" + File.separator, pat);
        cldrCollations.show();
+
+        cldrOthers = new CldrOthers(options[SOURCEDIR].value + "main" + File.separator, pat);
+        cldrOthers.show();
+
        getLocaleList();

        Matcher m = Pattern.compile(pat).matcher("");
@ -600,8 +910,57 @@ public class GenerateCldrTests {
            return cldrCollations.getInstance(loc1).equals(cldrCollations.getInstance(loc2)); // Collator.getInstance(loc1).equals(Collator.getInstance(loc2));
        }
    };
+    static ULocale zhHack = new ULocale("zh"); // FIXME hack for zh

    DataShower CollationShower = new DataShower() {
+		public void show(ULocale locale, Collection others) {
+			if (locale.equals(zhHack)) return;
+			
+			showLocales("collation", others);
+
+			Collator col = cldrCollations.getInstance(locale); // Collator.getInstance(locale);
+
+			UnicodeSet tailored = col.getTailoredSet();
+			if (locale.getLanguage().equals("zh")) {
+				tailored.addAll(new UnicodeSet("[[a-z]-[v]]"));
+				log.println("HACK for Pinyin");
+			}
+			tailored = createCaseClosure(tailored);
+			tailored = nfc(tailored);
+			//System.out.println(tailored.toPattern(true));
+
+			UnicodeSet exemplars = getExemplarSet(locale, UnicodeSet.CASE);
+			// add all the exemplars
+			if (false)
+				for (Iterator it = others.iterator(); it.hasNext();) {
+					exemplars.addAll(getExemplarSet((ULocale) it.next(),
+							UnicodeSet.CASE));
+				}
+
+			exemplars = createCaseClosure(exemplars);
+			exemplars = nfc(exemplars);
+			//System.out.println(exemplars.toPattern(true));
+			tailored.addAll(exemplars);
+			//UnicodeSet tailoredMinusHan = new
+			// UnicodeSet(tailored).removeAll(SKIP_COLLATION_SET);
+			if (!exemplars.containsAll(tailored)) {
+				//BagFormatter bf = new BagFormatter();
+				log.println("In Tailored, but not Exemplar; Locale: " + locale
+						+ "\t" + locale.getDisplayName());
+				log.println(new UnicodeSet(tailored).removeAll(exemplars)
+						.toPattern(false));
+				//bf.(log,"tailored", tailored, "exemplars", exemplars);
+				log.flush();
+			}
+			tailored.addAll(new UnicodeSet("[\\ .02{12}]"));
+			tailored.removeAll(SKIP_COLLATION_SET);
+
+			SortedBag bag = new SortedBag(col);
+			doCollationResult(col, tailored, bag);
+			out.println("  </collation>");
+		}
+	};
+/*
        public void show(ULocale locale, Collection others) {
        showLocales("collation", others);

@ -641,6 +1000,7 @@ public class GenerateCldrTests {
        doCollationResult(col, tailored, bag);
        out.println("  </collation>");
    }};
+*/
    static final UnicodeSet SKIP_COLLATION_SET = new UnicodeSet(
            "[[:script=han:][:script=hangul:]-[\u4e00-\u4eff \u9f00-\u9fff \uac00-\uacff \ud700-\ud7ff]]");

@ -803,7 +1163,14 @@ public class GenerateCldrTests {
        if (!locale.equals("root")) return "root";
        return null;
    }
-
+    
+    public static String replace(String source, String pattern, String replacement) {
+        // dumb code for now
+        for (int pos = source.indexOf(pattern, 0); pos >= 0; pos = source.indexOf(pattern, pos + 1)) {
+        	source = source.substring(0, pos) + replacement + source.substring(pos+pattern.length());
+        }
+        return source;
+    }

    static class CldrCollations {
        Set validLocales = new TreeSet();
@ -867,13 +1234,6 @@ public class GenerateCldrTests {
            }
        }

-        public static String replace(String source, String pattern, String replacement) {
-            // dumb code for now
-            for (int pos = source.indexOf(pattern, 0); pos >= 0; pos = source.indexOf(pattern, pos + 1)) {
-                source = source.substring(0, pos) + replacement + source.substring(pos+pattern.length());
-            }
-            return source;
-        }
        static Transliterator fromHex = Transliterator.getInstance("hex-any");

        private void getCollationRules(String locale) throws Exception {
@ -886,7 +1246,10 @@ public class GenerateCldrTests {
            Map types_rules = new TreeMap();
            locale_types_rules.put(locale, types_rules);
            for (Resource current = resource.first; current != null; current = current.next) {
-                //System.out.println(current.name);
+                if (current.name == null) {
+                	log.println("Collation: null name found in " + locale);
+                	continue;
+                }
                if (current instanceof ICUResourceWriter.ResourceTable) {
                    ICUResourceWriter.ResourceTable table = (ICUResourceWriter.ResourceTable) current;
                    for (Resource current2 = table.first; current2 != null; current2 = current2.next) {
@ -905,7 +1268,7 @@ public class GenerateCldrTests {
                                String rules = fromHex.transliterate(foo.val);
                                RuleBasedCollator fixed = generateCollator(locale, current.name, foo.name, rules);
                                if (fixed != null) {
-                                    log.println("Rules for: " + locale + "," + current.name);
+                                    log.println("Rules for: " + locale + ", " + current.name);
                                    log.println(rules);
                                    if (!rules.equals(foo.val)) {
                                        log.println("Original Rules from Ram: ");
--- a/icu4j/src/com/ibm/icu/dev/tool/cldr/GenerateSidewaysView.java
+++ b/icu4j/src/com/ibm/icu/dev/tool/cldr/GenerateSidewaysView.java
@ -90,7 +90,8 @@ public class GenerateSidewaysView {
        SKIP = 5,
        TZADIR = 6,
        NONVALIDATING = 7,
-        SHOW_DTD = 8;
+        SHOW_DTD = 8,
+		TRANSLIT = 9;

    private static final String NEWLINE = "\n";

@ -104,11 +105,11 @@ public class GenerateSidewaysView {
            UOption.create("tzadir", 't', UOption.REQUIRES_ARG).setDefault("C:\\ICU4J\\icu4j\\src\\com\\ibm\\icu\\dev\\tool\\cldr\\"),
            UOption.create("nonvalidating", 'n', UOption.NO_ARG),
            UOption.create("dtd", 'w', UOption.NO_ARG),
+            UOption.create("transliterate", 'y', UOption.NO_ARG),
    };
    private static String timeZoneAliasDir = null;

    public static void main(String[] args) throws SAXException, IOException {
-
        UOption.parseArgs(args, options);

        Matcher skipper = Pattern.compile(options[SKIP].value).matcher("");
@ -1553,6 +1554,10 @@ public class GenerateSidewaysView {
        }
        */
        void showCacheData() throws IOException {
+        	UnicodeSet untransliteratedCharacters = new UnicodeSet();
+        	Set translitErrors = new TreeSet();
+        	GenerateCldrTests.DraftChecker dc = new GenerateCldrTests.DraftChecker(options[SOURCEDIR].value);
+        	dc.isDraft("en");
            writeStyleSheet();
            PrintWriter out = null;
            String lastChainName = "";
@ -1597,18 +1602,35 @@ public class GenerateSidewaysView {
                        files.addAll(remainingFiles);
                        dataStyle = " class='nodata'";
                    }
-                    out.print("<tr><th" + dataStyle +
+
+                    String extra = "";
+                    if (data.string != null && options[TRANSLIT].doesOccur 
+                    		&& GenerateCldrTests.NON_LATIN.containsSome(data.string)) {                    	
+                    	try {
+							extra = GenerateCldrTests.toLatin.transliterate(data.string);
+	                    	untransliteratedCharacters.addAll(extra);
+	                    	if (extra.equals(data.string)) extra = "";
+	                  		else extra = "<br>(\"" + BagFormatter.toHTML.transliterate(extra) + "\")";                      	
+						} catch (RuntimeException e) {
+							translitErrors.add(e.getMessage());
+						}
+                    }
+                    out.print("<tr><th" + dataStyle + 
                            (lineCounter == 1 ? " width='20%'" : "")
-                            + ">\"" + data + "\"</th><td>");
+                            + ">\"" + data + "\""
+							+ extra
+							+ "</th><td>");
                    boolean first = true;
                    for (Iterator it3 = files.iterator(); it3.hasNext();) {
                        if (first) first = false;
                        else out.print(" ");
                        String localeID = (String)it3.next();
                        boolean emphasize = localeID.equals("root") || localeID.indexOf('_') >= 0;
+                        if (dc.isDraft(localeID)) out.print("<i>");
                        if (emphasize) out.print("<b>");
                        out.print("\u00B7" + localeID + "\u00B7");
                        if (emphasize) out.print("</b>");
+                        if (dc.isDraft(localeID)) out.print("</i>");
                    }
                    out.println("</td></tr>");
                }
@ -1622,6 +1644,15 @@ public class GenerateSidewaysView {
            }
            writeIndex();
            tripleData.writeData();
+            untransliteratedCharacters.retainAll(GenerateCldrTests.NON_LATIN);
+            log.println("Untranslated Characters*: " + untransliteratedCharacters.toPattern(false));
+            log.println("Untranslated Characters* (hex): " + untransliteratedCharacters.toPattern(true));
+            untransliteratedCharacters.closeOver(UnicodeSet.CASE);
+            log.println("Untranslated Characters: " + untransliteratedCharacters.toPattern(false));
+            log.println("Untranslated Characters (hex): " + untransliteratedCharacters.toPattern(true));
+            for (Iterator it = translitErrors.iterator(); it.hasNext();) {
+            	log.println(it.next());
+            }
        }

        /**
@ -1651,8 +1682,9 @@ public class GenerateSidewaysView {
                    "Each value is listed under the field designator (in XML XPath format), " +
                    "followed by all the locales that use it. " +
                    "Locales are omitted if the value would be the same as the parent's. " +
-                    "The locales are listed in the format: \u00B7aa\u00B7 for searching. " +
-                    "The value appears in red if it is the same as the root. </p>");
+					"The locales are listed in the format: \u00B7aa\u00B7 for searching. " +
+                    "The value appears in red if it is the same as the root. " +
+                    "Draft locales are italic-gray; territory locales are bold.</p>");             
            out.println("<table>");
            return out;
        }
@ -1661,6 +1693,7 @@ public class GenerateSidewaysView {
            out.println(".head { font-weight:bold; background-color:#DDDDFF }");
            out.println("td, th { border: 1px solid #0000FF; text-align }");
            out.println("th { width:10% }");
+            out.println("i { color: gray }");            
            out.println(".nodata { background-color:#FF0000 }");
            out.println("table {margin-top: 1em}");
            out.close();
--- a/icu4j/src/com/ibm/icu/dev/tool/cldr/TestCldr.java
+++ b/icu4j/src/com/ibm/icu/dev/tool/cldr/TestCldr.java
@ -38,6 +38,8 @@ import com.ibm.icu.text.DateFormat;
 import com.ibm.icu.text.NumberFormat;
 import com.ibm.icu.text.SimpleDateFormat;
 import com.ibm.icu.text.Transliterator;
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;

 /**
 * This is a file that runs the CLDR tests for ICU4J, to verify that ICU4J implements them
@ -201,37 +203,53 @@ public class TestCldr {

    static String[] NumberNames = {"standard", "integer", "decimal", "percent", "scientific"};

-    // ============ Handler for Collation ============
-    {
-        addHandler("collation", new Handler() {
-            public void handleResult(ULocale currentLocale, String value) {
-                Collator col = Collator.getInstance(currentLocale);
-                String lastLine = "";
-                int count = 0;
-                for (int pos = 0; pos < value.length();) {
-                    int nextPos = value.indexOf('\n', pos);
-                    if (nextPos < 0)
-                        nextPos = value.length();
-                    String line = value.substring(pos, nextPos).trim(); // HACK for SAX
-                    if (line.length() != 0) {  // HACK for SAX
-                        int comp = col.compare(lastLine, line);
-                        if (comp > 0) {
-                            failures++;
-                            logln("\tLine " + (count + 1) + "\tFailure: " + showString(lastLine) + " should be leq " + showString(line));
-                        } else if (DEBUG) {
-                            System.out.println("OK: " + line);
-                        }
-                    }
-                    pos = nextPos + 1;
-                    lastLine = line;
-                    count++;
-                }
-            }
-        });

-        // ============ Handler for Numbers ============
-        addHandler("number", new Handler() {
-            public void handleResult(ULocale locale, String result) {
+    // ============ Handler for Collation ============ 
+    static UnicodeSet controlsAndSpace = new UnicodeSet("[:cc:]");
+    
+    static String remove(String in, UnicodeSet toRemove) {
+    	int cp;
+    	StringBuffer result = new StringBuffer();
+    	for (int i = 0; i < in.length(); i += UTF16.getCharCount(cp)) {
+    		cp = UTF16.charAt(in, i);
+    		if (!toRemove.contains(cp)) UTF16.append(result, cp);
+    	}
+    	return result.toString();
+    }
+
+    {
+		addHandler("collation", new Handler() {
+			public void handleResult(ULocale currentLocale, String value) {
+				Collator col = Collator.getInstance(currentLocale);
+				String lastLine = "";
+				int count = 0;
+				for (int pos = 0; pos < value.length();) {
+					int nextPos = value.indexOf('\n', pos);
+					if (nextPos < 0)
+						nextPos = value.length();
+					String line = value.substring(pos, nextPos);
+					line = remove(line, controlsAndSpace); // HACK for SAX
+					if (line.trim().length() != 0) { // HACK for SAX
+						int comp = col.compare(lastLine, line);
+						if (comp > 0) {
+							failures++;
+							logln("\tLine " + (count + 1) + "\tFailure: "
+									+ showString(lastLine) + " should be leq "
+									+ showString(line));
+						} else if (DEBUG) {
+							System.out.println("OK: " + line);
+						}
+						lastLine = line;
+					}
+					pos = nextPos + 1;
+					count++;
+				}
+			}
+		});
+        
+        // ============ Handler for Numbers ============ 
+		addHandler("number", new Handler() {
+			public void handleResult(ULocale locale, String result) {
                NumberFormat nf = null;
                double v = Double.NaN;
                for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
--- a/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.java
+++ b/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.java
@ -314,9 +314,10 @@ public class MakeUnicodeFiles {
                            addValueComments(property, value, comments);
                        comments = "";
                        if (line.startsWith("Generate:")) {
-                            filesToDo = Utility.split(lineValue, ' ');
-                            if (filesToDo.length == 0) {
-                                filesToDo = new String[] {""};
+                            filesToDo = Utility.split(lineValue.trim(), ' ');
+                            if (filesToDo.length == 0
+                            		|| (filesToDo.length == 1 && filesToDo[0].length() == 0)) {
+                                filesToDo = new String[] {".*"};
                            }
                        } else if (line.startsWith("DeltaVersion:")) {
                            dVersion = Integer.parseInt(lineValue);
@ -476,24 +477,22 @@ public class MakeUnicodeFiles {
    }
    
    public static void generateFile() throws IOException {
-    	String[] lines = new String[2];
-    	Utility.filesAreIdentical("C:\\DATA\\UCD\\4.0.1-Update\\CaseFolding-4.0.1.txt", 
-    			"C:\\DATA\\GEN\\DerivedData\\CaseFolding-4.1.0d13.txt", lines);
        for (int i = 0; i < Format.theFormat.filesToDo.length; ++i) {
-            String fileName =
-                Format.theFormat.filesToDo[i].trim().toLowerCase(
-                    Locale.ENGLISH);
+            String fileNamePattern =
+                Format.theFormat.filesToDo[i].trim(); // .toLowerCase(Locale.ENGLISH);
+            Matcher matcher = Pattern.compile(fileNamePattern, Pattern.CASE_INSENSITIVE).matcher("");
            Iterator it = Format.theFormat.getFiles().iterator();
            boolean gotOne = false;
            while (it.hasNext()) {
                String propname = (String) it.next();
-                if (!propname.toLowerCase(Locale.ENGLISH).startsWith(fileName)) continue;
+                if (!matcher.reset(propname).matches()) continue;
+                //if (!propname.toLowerCase(Locale.ENGLISH).startsWith(fileName)) continue;
                generateFile(propname);
                gotOne = true;
            }
            if (!gotOne) {
                throw new IllegalArgumentException(
-                    "Non-matching file name: " + fileName);
+                    "Non-matching file name: " + fileNamePattern);
            }
        }
    }
@ -715,7 +714,8 @@ public class MakeUnicodeFiles {
        List propList = Format.theFormat.getPropertiesFromFile(filename);
        for (Iterator propIt = propList.iterator(); propIt.hasNext();) {
             BagFormatter bf = new BagFormatter(toolFactory);
-             UnicodeProperty prop = toolFactory.getProperty((String) propIt.next());
+             String nextPropName = (String) propIt.next();
+             UnicodeProperty prop = toolFactory.getProperty(nextPropName);
             String name = prop.getName();
             System.out.println("Property: " + name + "; " + prop.getTypeName(prop.getType()));
            pw.println();
--- a/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.txt
@ -1,6 +1,18 @@
-Generate:
+Generate: 
 DeltaVersion: 7

+File: GraphemeClusterBreakProperty
+Property: Grapheme_Cluster_Break
+Format:	skipValue=Other
+
+File: WordBreakProperty
+Property: Word_Break
+Format:	skipValue=Other
+
+File: SentenceBreakProperty
+Property: Sentence_Break
+Format:	skipValue=Other
+
 File:	Blocks
 Property: Block
 # Note:   When comparing block names, casing, whitespace, hyphens,
@ -41,6 +53,9 @@ Value:	3.2
 Value:	4.0
 # Newly assigned in Unicode 4.0.0 (April, 2003)

+Value:	4.1
+# Newly assigned in Unicode 4.1.0 (XXX, 2005)
+
 File:	extracted/DerivedBidiClass
 Property:	Bidi_Class
 # Bidi Class (listing UnicodeData.txt, field 4: see UCD.html)
@ -86,7 +101,7 @@ Property:	ID_Start
 Property:	ID_Continue
 # Derived Property: ID_Continue
 #  Characters that can continue an identifier.
-#  Generated from: ID_Start + Mn+Mc+Nd+Pc
+#  Generated from: ID_Start + Mn+Mc+Nd+Pc + Other_ID_Continue
 #  NOTE: Cf characters should be filtered out.


@ -109,7 +124,8 @@ Property:	XID_Continue

 Property:	Default_Ignorable_Code_Point
 # Derived Property: Default_Ignorable_Code_Point
-#  Generated from Other_Default_Ignorable_Code_Point + Cf + Cc + Cs + Noncharacters - White_Space - Annotation_characters
+#  Generated from Other_Default_Ignorable_Code_Point + Cf + Cc + Cs + Noncharacters
+#  - White_Space - FFF9..FFFB (Annotation Characters)

 Property:	Grapheme_Extend
 # Derived Property: Grapheme_Extend
@ -307,6 +323,9 @@ Property:	Other_ID_Continue
 Property:	STerm

 Property:	Variation_Selector
+Property:	Pattern_White_Space
+Property:	Pattern_Syntax
+

 File:	PropertyAliases
 Property: SPECIAL
@ -315,7 +334,6 @@ File:	PropertyValueAliases
 Property: SPECIAL

 File:	Scripts
-
 Property:	Script
 Format:	nameStyle=none skipUnassigned=Common

--- a/tools/unicodetools/com/ibm/text/UCD/ToolUnicodePropertySource.java
+++ b/tools/unicodetools/com/ibm/text/UCD/ToolUnicodePropertySource.java
@ -12,6 +12,7 @@ import java.util.TreeSet;
 import com.ibm.icu.dev.test.util.UnicodeMap;
 import com.ibm.icu.dev.test.util.UnicodeProperty;
 import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.UnicodeSet;
 import com.ibm.text.utility.Utility;

 public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
@ -49,8 +50,8 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
                if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
                return ucd.getName(codepoint);
            }
-        }.setMain("Name", "na", UnicodeProperty.MISC, version)
-        .setValues("<string>"));
+        }.setValues("<string>")
+		.setMain("Name", "na", UnicodeProperty.MISC, version));
        
        add(new UnicodeProperty.SimpleProperty() {
            public String _getValue(int codepoint) {
@ -63,24 +64,24 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
            protected UnicodeMap _getUnicodeMap() {
                return ucd.blockData;
            }
-        }.setMain("Block", "blk", UnicodeProperty.CATALOG, version)
-        .setValues(ucd.getBlockNames(null)));
+        }.setValues(ucd.getBlockNames(null))
+		.setMain("Block", "blk", UnicodeProperty.CATALOG, version));
        
        add(new UnicodeProperty.SimpleProperty() {
            public String _getValue(int codepoint) {
                //if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
                return ucd.getBidiMirror(codepoint);
            }
-        }.setMain("Bidi_Mirroring_Glyph", "bmg", UnicodeProperty.STRING, version)
-        .setValues("<string>"));
+        }.setValues("<string>")
+		.setMain("Bidi_Mirroring_Glyph", "bmg", UnicodeProperty.STRING, version));
        
        add(new UnicodeProperty.SimpleProperty() {
            public String _getValue(int codepoint) {
                //if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
                return ucd.getCase(codepoint,UCD_Types.FULL,UCD_Types.FOLD);
            }
-        }.setMain("Case_Folding", "cf", UnicodeProperty.STRING, version)
-        .setValues("<string>"));
+        }.setValues("<string>")
+		.setMain("Case_Folding", "cf", UnicodeProperty.STRING, version));
        
        add(new UnicodeProperty.SimpleProperty() {
            NumberFormat nf = NumberFormat.getInstance();
@ -121,8 +122,8 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
            public int getMaxWidth(boolean isShort) {
                return 15;
            }
-        }.setMain("NFD_Quick_Check", "NFD_QC", UnicodeProperty.ENUMERATED, version)
-        .setValues(LONG_YES_NO, YES_NO));
+        }.setValues(LONG_YES_NO, YES_NO)
+		.setMain("NFD_Quick_Check", "NFD_QC", UnicodeProperty.ENUMERATED, version));
        
        add(new UnicodeProperty.SimpleProperty() {
            public String _getValue(int codepoint) {
@ -133,8 +134,8 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
            public int getMaxWidth(boolean isShort) {
                return 15;
            }
-        }.setMain("NFC_Quick_Check", "NFC_QC", UnicodeProperty.ENUMERATED, version)
-        .setValues(LONG_YES_NO_MAYBE, YES_NO_MAYBE));
+        }.setValues(LONG_YES_NO_MAYBE, YES_NO_MAYBE)
+		.setMain("NFC_Quick_Check", "NFC_QC", UnicodeProperty.ENUMERATED, version));
        
        add(new UnicodeProperty.SimpleProperty() {
            public String _getValue(int codepoint) {
@ -145,8 +146,8 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
            public int getMaxWidth(boolean isShort) {
                return 15;
            }
-        }.setMain("NFKD_Quick_Check", "NFKD_QC", UnicodeProperty.ENUMERATED, version)
-        .setValues(LONG_YES_NO, YES_NO));
+        }.setValues(LONG_YES_NO, YES_NO)
+		.setMain("NFKD_Quick_Check", "NFKD_QC", UnicodeProperty.ENUMERATED, version));
        
        add(new UnicodeProperty.SimpleProperty() {
            public String _getValue(int codepoint) {
@ -157,8 +158,11 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
            public int getMaxWidth(boolean isShort) {
                return 15;
            }
-        }.setMain("NFKC_Quick_Check", "NFKC_QC", UnicodeProperty.ENUMERATED, version)
-        .setValues(LONG_YES_NO_MAYBE, YES_NO_MAYBE));
+        }.setValues(LONG_YES_NO_MAYBE, YES_NO_MAYBE)
+		.setMain("NFKC_Quick_Check", "NFKC_QC", UnicodeProperty.ENUMERATED, version));
+
+
+
        
        /*
        add(new UnicodeProperty.SimpleProperty() {
@ -206,7 +210,103 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
            if (DEBUG) System.out.println("Iterated Names: " + name);
            add(new ToolUnicodeProperty(name));
        }
-        
+
+		add(new UnicodeProperty.UnicodeMapProperty() {
+        	{
+        		unicodeMap = new UnicodeMap();
+        		unicodeMap.setErrorOnReset(true);
+        		unicodeMap.put(0xD, "CR");
+        		unicodeMap.put(0xA, "LF");
+        		UnicodeProperty cat = getProperty("General_Category");
+        		UnicodeSet temp = cat.getSet("Line_Separator")
+				.addAll(cat.getSet("Paragraph_Separator"))
+				.addAll(cat.getSet("Control"))
+				.addAll(cat.getSet("Format"))
+				.remove(0xD).remove(0xA).remove(0x200C).remove(0x200D);
+        		unicodeMap.putAll(temp, "Control");
+        		UnicodeSet graphemeExtend = getProperty("Grapheme_Extend").getSet("true");
+        		unicodeMap.putAll(graphemeExtend,"Extend");
+        		UnicodeProperty hangul = getProperty("Hangul_Syllable_Type");
+        		unicodeMap.putAll(hangul.getSet("L"),"L");
+        		unicodeMap.putAll(hangul.getSet("V"),"V");
+        		unicodeMap.putAll(hangul.getSet("T"),"T");
+        		unicodeMap.putAll(hangul.getSet("LV"),"LV");
+        		unicodeMap.putAll(hangul.getSet("LVT"),"LVT");
+        		unicodeMap.setMissing("Other");
+        	}
+        }.setMain("Grapheme_Cluster_Break", "GCB", UnicodeProperty.ENUMERATED, version));
+
+        add(new UnicodeProperty.UnicodeMapProperty() {
+        	{
+        		unicodeMap = new UnicodeMap();
+        		unicodeMap.setErrorOnReset(true);
+        		UnicodeProperty cat = getProperty("General_Category");
+        		unicodeMap.putAll(cat.getSet("Format").remove(0x200C).remove(0x200D), "Format");
+        		UnicodeProperty script = getProperty("Script");
+        		unicodeMap.putAll(script.getSet("Katakana")
+        				.addAll(new UnicodeSet("[\u3031\u3032\u3033\u3034\u3035\u309B\u309C\u30A0\u30FC\uFF70\uFF9E\uFF9F]"))
+						, "Katakana");
+        		Object foo = unicodeMap.getSet("Katakana");
+        		UnicodeSet graphemeExtend = getProperty("Grapheme_Extend").getSet("true");
+        		unicodeMap.putAll(getProperty("Alphabetic").getSet("true")
+        				.add(0xA0).add(0x05F3)
+						.removeAll(getProperty("Ideographic").getSet("true"))
+						.removeAll(unicodeMap.getSet("Katakana"))
+						.removeAll(script.getSet("Thai"))
+						.removeAll(script.getSet("Lao"))
+						.removeAll(script.getSet("Hiragana"))
+						.removeAll(graphemeExtend),
+						"ALetter");
+        		unicodeMap.putAll(new UnicodeSet("[\\u0027\\u00B7\\u05F4\\u2019\\u2027\\u003A]")
+								,"MidLetter");
+        		UnicodeProperty lineBreak = getProperty("Line_Break");
+        		unicodeMap.putAll(lineBreak.getSet("Infix_Numeric")
+        				.remove(0x003A), "MidNum");
+        		unicodeMap.putAll(lineBreak.getSet("Numeric"), "Numeric");
+        		unicodeMap.putAll(cat.getSet("Connector_Punctuation").remove(0x30FB).remove(0xFF65), "Numeric");
+        		unicodeMap.putAll(graphemeExtend, "Other"); // to verify that none of the above touch it.
+        		unicodeMap.setMissing("Other");
+        	}
+        }.setMain("Word_Break", "WB", UnicodeProperty.ENUMERATED, version));
+
+        add(new UnicodeProperty.UnicodeMapProperty() {
+        	{
+        		unicodeMap = new UnicodeMap();
+        		unicodeMap.setErrorOnReset(true);
+        		unicodeMap.putAll(new UnicodeSet("[\\u000A\\u000D\\u0085\\u2028\\u2029]"), "Sep");
+        		UnicodeProperty cat = getProperty("General_Category");
+        		unicodeMap.putAll(cat.getSet("Format").remove(0x200C).remove(0x200D), "Format");
+        		unicodeMap.putAll(getProperty("Whitespace").getSet("true")
+        				.removeAll(unicodeMap.getSet("Sep"))
+						.remove(0xA0), "Sp");
+        		UnicodeSet graphemeExtend = getProperty("Grapheme_Extend").getSet("true");
+        		unicodeMap.putAll(getProperty("Lowercase").getSet("true")
+        				.removeAll(graphemeExtend), "Lower");
+        		unicodeMap.putAll(getProperty("Uppercase").getSet("true")
+        				.addAll(cat.getSet("Titlecase_Letter"))
+						, "Upper");
+        		UnicodeSet temp = getProperty("Alphabetic").getSet("true")
+				.add(0xA0).add(0x5F3)
+				.removeAll(unicodeMap.getSet("Lower"))
+		        .removeAll(unicodeMap.getSet("Upper"))
+				.removeAll(graphemeExtend);
+        		unicodeMap.putAll(temp, "OLetter");
+        		UnicodeProperty lineBreak = getProperty("Line_Break");
+        		unicodeMap.putAll(lineBreak.getSet("Numeric"), "Numeric");
+        		unicodeMap.put(0x002E, "ATerm");
+        		unicodeMap.putAll(getProperty("STerm").getSet("true")
+        				.removeAll(unicodeMap.getSet("ATerm")), "STerm");
+        		unicodeMap.putAll(cat.getSet("Open_Punctuation")
+        				.addAll(cat.getSet("Close_Punctuation"))
+        				.addAll(lineBreak.getSet("Quotation"))
+						.remove(0x05F3)
+						.removeAll(unicodeMap.getSet("ATerm"))
+						.removeAll(unicodeMap.getSet("STerm"))
+						, "Close");
+        		unicodeMap.putAll(graphemeExtend, "Other"); // to verify that none of the above touch it.
+        		unicodeMap.setMissing("Other");
+        	}
+        }.setMain("Sentence_Break", "SB", UnicodeProperty.ENUMERATED, version));
    }
    
    static String[] YES_NO_MAYBE = {"N", "M", "Y"};
--- a/tools/unicodetools/com/ibm/text/UCD/UCD.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
-* $Date: 2004/11/12 23:17:15 $
-* $Revision: 1.35 $
+* $Date: 2004/11/13 23:10:32 $
+* $Revision: 1.36 $
 *
 *******************************************************************************
 */
@ -1579,8 +1579,13 @@ to guarantee identifier closure.

                //T = Mc + (Cf - ZWNJ - ZWJ)
                int cp = uData.codePoint;
-                    byte old = uData.joiningType;
-                    byte cat = uData.generalCategory;
+                byte old = uData.joiningType;
+                byte cat = uData.generalCategory;
+                if (cat == Me) {
+                	if (compositeVersion >= 0x40100) {
+                		uData.joiningType = JT_T; 
+                	}
+                }
                //if (cp == 0x200D) {
                  //  uData.joiningType = JT_C;
                //} else
--- a/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $
-* $Date: 2004/11/12 23:17:15 $
-* $Revision: 1.28 $
+* $Date: 2004/11/13 23:10:32 $
+* $Revision: 1.29 $
 *
 *******************************************************************************
 */
@ -154,6 +154,8 @@ final class UCD_Names implements UCD_Types {
        "STerm",
        "Variation_Selector",
        "Other_ID_Continue",
+		"Pattern_White_Space",
+		"Pattern_Syntax"
    };

    static final String[] SHORT_BP = {
@ -191,7 +193,9 @@ final class UCD_Names implements UCD_Types {
        "OIDS",
        "STerm",
        "VS",
-        "OIDC"
+        "OIDC",
+		"PatWS",
+		"PatSyn"
    };

    /*
@ -253,7 +257,7 @@ final class UCD_Names implements UCD_Types {
        "CM", "BB", "BA", "SP", "BK", "CR", "LF", "CB",
        "SA", "AI", "B2", "SG", "ZW",
        "NL",
-        "WJ",
+        "WJ", "JL", "JV", "JT", "H2", "H3"
        //"JL",
        //"JV",
        //"JT",
@ -269,7 +273,7 @@ final class UCD_Names implements UCD_Types {
        "MandatoryBreak", "CarriageReturn", "LineFeed", "ContingentBreak",
        "ComplexContext", "Ambiguous", "BreakBoth", "Surrogate", "ZWSpace",
        "Next_Line",
-        "Word_Joiner"
+        "Word_Joiner", "JL", "JV", "JT", "H2", "H3"
        //"Leading_Jamo",
        //"Vowel_Jamo",
        //"Trailing_Jamo",
@ -412,7 +416,8 @@ final class UCD_Names implements UCD_Types {
    "3.0",
    "3.1",
    "3.2",
-    "4.0"
+    "4.0",
+	"4.1",
  };


--- a/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $
-* $Date: 2004/03/11 19:03:16 $
-* $Revision: 1.28 $
+* $Date: 2004/11/13 23:10:32 $
+* $Revision: 1.29 $
 *
 *******************************************************************************
 */
@ -15,7 +15,7 @@ package com.ibm.text.UCD;

 public interface UCD_Types {
    
-    static final byte BINARY_FORMAT = 15; // bumped if binary format of UCD changes. Forces rebuild   
+    static final byte BINARY_FORMAT = 16; // bumped if binary format of UCD changes. Forces rebuild   
    
    public static final String BASE_DIR = "C:\\DATA\\";
    public static final String UCD_DIR = BASE_DIR + "UCD\\";
@ -214,7 +214,9 @@ public interface UCD_Types {
        Sentence_Terminal = 32,
        Variation_Selector = 33,
        Other_ID_Continue = 34,
-	    LIMIT_BINARY_PROPERTIES = 35;
+        Pattern_White_Space = 35,
+        Pattern_Syntax = 36,
+	    LIMIT_BINARY_PROPERTIES = 37;

 	/*
    static final int
@ -247,10 +249,15 @@ public interface UCD_Types {
        LB_SA = 24, LB_AI = 25, LB_B2 = 26, LB_SG = 27, LB_ZW = 28,
        LB_NL = 29,
        LB_WJ = 30,
+        LB_JL = 31,
+        LB_JV = 32,
+        LB_JT = 33,
+        LB_H2 = 34,
+        LB_H3 = 35,
        //LB_JL = 29,
        //LB_JV = 30,
        //LB_JT = 31,
-        LIMIT_LINE_BREAK = 31,
+        LIMIT_LINE_BREAK = 36,
        LB_LIMIT = LIMIT_LINE_BREAK;

    // east asian width
@ -394,7 +401,8 @@ public interface UCD_Types {
    AGE31 = 5,
    AGE32 = 6,
    AGE40 = 7,
-    LIMIT_AGE = 8;
+    AGE41 = 8,
+    LIMIT_AGE = 9;

    static final String[] AGE_VERSIONS = {
        "?",
@ -404,7 +412,8 @@ public interface UCD_Types {
        "3.0.0",
        "3.1.0",
        "3.2.0",
-        "4.0.0"       
+        "4.0.0",  
+        "4.1.0"       
    };

 public static byte
--- a/tools/unicodetools/com/ibm/text/UCD/UnicodeInvariants.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/UnicodeInvariants.txt
@ -71,7 +71,7 @@ $Alphabetic = [$GC:Lu $GC:Ll $GC:Lt $GC:Lm $GC:Lo $GC:Nl $Other_Alphabetic]
 $Lowercase = [$GC:Ll $Other_Lowercase]
 $Uppercase = [$GC:Lu $Other_Uppercase]
 $ID_Start = [$GC:Lu $GC:Ll $GC:Lt $GC:Lm $GC:Lo $GC:Nl $Other_ID_Start]
-$ID_Continue = [$ID_Start $GC:Mn $GC:Mc $GC:Nd $GC:Pc]
+$ID_Continue = [$ID_Start $GC:Mn $GC:Mc $GC:Nd $GC:Pc] $Other_ID_Continue
 $Default_Ignorable_Code_Point = [[$Other_Default_Ignorable_Code_Point $GC:Cf $GC:Cc $GC:Cs $Variation_Selector $Noncharacter_Code_Point] - [$White_Space\uFFF9-\uFFFB]]
 $Grapheme_Extend = [$GC:Me $GC:Mn $Other_Grapheme_Extend]
 $Grapheme_Base = [^$GC:Cc $GC:Cf $GC:Cs $GC:Co $GC:Cn $GC:Zl $GC:Zp $Grapheme_Extend]
--- a/tools/unicodetools/com/ibm/text/utility/Utility.java
+++ b/tools/unicodetools/com/ibm/text/utility/Utility.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
-* $Date: 2004/11/12 23:17:15 $
-* $Revision: 1.45 $
+* $Date: 2004/11/13 23:10:32 $
+* $Revision: 1.46 $
 *
 *******************************************************************************
 */
@ -673,6 +673,7 @@ public final class Utility implements UCD_Types {    // COMMON UTILITIES

    private static final String[] searchPath = {
        "EXTRAS",
+        "4.1.0",
        "4.0.1",
        "4.0.0",
        "3.2.0",