fixes for 3.2

X-SVN-Rev: 8130
2002-03-20 00:21:43 +00:00 · 2002-03-20 00:21:43 +00:00 · 1660406201
commit 1660406201
parent 79d29d4e37
14 changed files with 345 additions and 109 deletions
--- a/tools/unicodetools/com/ibm/text/UCD/ConvertUCD.java
+++ b/tools/unicodetools/com/ibm/text/UCD/ConvertUCD.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/ConvertUCD.java,v $
-* $Date: 2002/03/15 00:34:46 $
-* $Revision: 1.5 $
+* $Date: 2002/03/20 00:21:43 $
+* $Revision: 1.6 $
 *
 *******************************************************************************
 */
@ -25,7 +25,7 @@ import java.io.*;
 */

 public final class ConvertUCD implements UCD_Types {
-    public static final boolean SHOW = true;
+    public static final boolean SHOW = false;
    public static final boolean DEBUG = false;

    public static int major;
@ -201,7 +201,7 @@ public final class ConvertUCD implements UCD_Types {
    // MAIN!!

    public static void main (String[] args) throws Exception {
-        System.out.println("ConvertUCD");
+        System.out.println("Building binary version of UCD");

        log = new PrintWriter(new BufferedWriter(
            new OutputStreamWriter(
@ -260,8 +260,17 @@ public final class ConvertUCD implements UCD_Types {
            UData value = (UData) charData.get(key);
            value.compact();
        }
-        UData ud = getEntry(0x2A6D6);
+        
+        UData ud;
+        ud = getEntry(0x5e);
+        System.out.println("SPOT-CHECK: 5e: " + ud);
+        
+        ud = getEntry(0x130);
+        System.out.println("SPOT-CHECK: 130: " + ud);
+        
+        ud = getEntry(0x2A6D6);
        System.out.println("SPOT-CHECK: 2A6D6: " + ud);
+        
        ud = getEntry(0xFFFF);
        System.out.println("SPOT-CHECK: FFFF: " + ud);

@ -493,7 +502,16 @@ public final class ConvertUCD implements UCD_Types {
                                if (type.equals("I")) {
                                    data.simpleCaseFolding = val;
                                    setBinaryProperty(cps, CaseFoldTurkishI);
-                                    System.out.println("SPOT-CHECK: <" + parts[i-1] + "> Setting " + Utility.hex(cps) + ": " + Utility.hex(val));
+                                    System.out.println("SPOT-CHECK: <" + parts[i-1] + "> Setting " 
+                                    	+ Utility.hex(cps) + ": " + Utility.hex(val));
+                                }
+                            } else if (labels[0].equals("SpecialCasing")   // special handling for special casing
+                            			&& labels[4].equals("sc")
+                                		&& parts[4].trim().length() > 0) {
+                                if (i < 4) {
+                                	if (DEBUG) System.out.println("Got special: " + Utility.hex(cps) + ", " 
+                                		+ Utility.hex(key) + ":" + Utility.hex(val));
+                                	addCharData(cps, "sc", parts[4].trim() + ":" + key + ":" + val);
                                }
                            } else {
                                /*if (key.equals("sn")) { // SKIP UNDEFINED!!
@ -782,12 +800,16 @@ public final class ConvertUCD implements UCD_Types {
            } else if (fieldName.equals("su")) {
                uData.fullUppercase = fieldValue;
            } else if (fieldName.equals("sl")) {
+            	if (DEBUG) System.out.println("Setting full lowercase to " + Utility.hex(fieldValue) + uData);
                uData.fullLowercase = fieldValue;
            } else if (fieldName.equals("st")) {
                uData.fullTitlecase = fieldValue;

            } else if (fieldName.equals("sc")) {
-                uData.specialCasing = fieldValue;
+            	if (uData.specialCasing.length() > 0) {
+            		uData.specialCasing += ";";
+            	}
+                uData.specialCasing += fieldValue;

            } else if (fieldName.equals("xp")) {
                uData.binaryProperties |= 1 << Utility.lookup(fieldValue, UCD_Names.BP, true);
--- a/tools/unicodetools/com/ibm/text/UCD/DerivedProperty.java
+++ b/tools/unicodetools/com/ibm/text/UCD/DerivedProperty.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedProperty.java,v $
-* $Date: 2002/03/15 01:57:01 $
-* $Revision: 1.11 $
+* $Date: 2002/03/20 00:21:43 $
+* $Revision: 1.12 $
 *
 *******************************************************************************
 */
@ -285,6 +285,11 @@ public final class DerivedProperty implements UCD_Types {
            else if (nfx.isTrailing(cp)) return MAYBE;
            else return "";
        }
+        
+		public String getListingValue(int cp) {
+    		return getValue(cp, LONG);
+    	}
+        
        boolean hasValue(int cp) { return getValue(cp).length() != 0; }
    };

@ -460,6 +465,12 @@ of characters, the first of which has a non-zero combining class.
                if (isCompEx(cp)) return true;
                return false;
            }
+            /*
+			public String getListingValue(int cp) {
+        		if (getValueType() != BINARY) return getValue(cp, SHORT);
+        		return getProperty(SHORT);
+			}
+			*/
        };
        
        dprops[FullCompInclusion] = new UnicodeProperty() {
@ -537,37 +548,15 @@ of characters, the first of which has a non-zero combining class.
                hasUnassigned = true;
                shortName = "DI";
                header = header = "# Derived Property: " + name
-                    + "\r\n#  Generated from Other_Default_Ignorable_Code_Point + Cf + Cc + Cs - White_Space";
+                    + "\r\n#  Generated from <2060..206F, FFF0..FFFB, E0000..E0FFF>"
+                    + "\r\n#    + Other_Default_Ignorable_Code_Point + (Cf + Cc + Cs - White_Space)";
            }
            boolean hasValue(int cp) {
+            	if (0x2060 <= cp && cp <= 0x206F || 0xFFF0 <= cp && cp <= 0xFFFB || 0xE0000 <= cp && cp <= 0xE0FFF) return true;
+                if (ucdData.getBinaryProperty(cp,Other_Default_Ignorable_Code_Point)) return true;
                if (ucdData.getBinaryProperty(cp, White_space)) return false;
                byte cat = ucdData.getCategory(cp);
-                if (cat == Cf || cat == Cs || cat == Cc
-                || ucdData.getBinaryProperty(cp,Reserved_Cf_Code_Point)) return true;
-                return false;
-            }
-        };
-
-/*
-        GraphemeExtend = 27,
-        GraphemeBase = 28,
-# GraphemeExtend := Me + Mn + Mc + Other_GraphemeExtend - GraphemeLink
-# GraphemeBase := 
-
-*/
-        dprops[GraphemeExtend] = new UnicodeProperty() {
-            {
-                type = DERIVED_CORE;
-                name = "Grapheme_Extend";
-                shortName = "GrExt";
-                header = header = "# Derived Property: " + name
-                    + "\r\n#  Generated from: Me + Mn + Mc + Other_Grapheme_Extend - Grapheme_Link";
-            }
-            boolean hasValue(int cp) {
-                if (ucdData.getBinaryProperty(cp, GraphemeExtend)) return false;
-                byte cat = ucdData.getCategory(cp);
-                if (cat == Me || cat == Mn || cat == Mc
-                || ucdData.getBinaryProperty(cp,Other_GraphemeExtend)) return true;
+                if (cat == Cf || cat == Cs || cat == Cc) return true;
                return false;
            }
        };
@ -576,6 +565,7 @@ of characters, the first of which has a non-zero combining class.
            {
                name = "Other_Case_Ignorable";
                shortName = "OCI";
+                isStandard = false;
                
                header = header = "# Binary Property";
            }
@ -608,7 +598,7 @@ of characters, the first of which has a non-zero combining class.
            }
            boolean hasValue(int cp) {
                if (hasSoftDot(cp)) return true;
-                if (!Main.nfkd.hasDecomposition(cp)) return false;
+                if (!Main.nfkd.normalizationDiffers(cp)) return false;
                String decomp = Main.nfd.normalize(cp);
                boolean ok = false;
                for (int i = decomp.length()-1; i >= 0; --i) {
@ -630,6 +620,7 @@ of characters, the first of which has a non-zero combining class.
        dprops[Case_Ignorable] = new UnicodeProperty() {
            {
                name = "Case_Ignorable";
+                isStandard = false;
                shortName = "CI";
                header = header = "# Derived Property: " + name
                    + "\r\n#  Generated from: Other_Case_Ignorable + Lm + Mn + Me + Cf";
@ -642,6 +633,33 @@ of characters, the first of which has a non-zero combining class.
            }
        };
        
+/*
+        GraphemeExtend = 27,
+        GraphemeBase = 28,
+# GraphemeExtend := Me + Mn + Mc + Other_GraphemeExtend - GraphemeLink
+# GraphemeBase := 
+
+*/
+        dprops[GraphemeExtend] = new UnicodeProperty() {
+            {
+                type = DERIVED_CORE;
+                name = "Grapheme_Extend";
+                shortName = "GrExt";
+                header = header = "# Derived Property: " + name
+                    + "\r\n#  Generated from: Me + Mn + Mc + Other_Grapheme_Extend - Grapheme_Link - CGJ"
+                    + "\r\n#  (CGJ = U+034F)";
+                     
+            }
+            boolean hasValue(int cp) {
+            	if (cp == 0x034F) return false;
+                if (ucdData.getBinaryProperty(cp, GraphemeLink)) return false;
+                byte cat = ucdData.getCategory(cp);
+                if (cat == Me || cat == Mn || cat == Mc
+                || ucdData.getBinaryProperty(cp,Other_GraphemeExtend)) return true;
+                return false;
+            }
+        };
+
        dprops[GraphemeBase] = new UnicodeProperty() {
            {
                type = DERIVED_CORE;
@ -649,9 +667,11 @@ of characters, the first of which has a non-zero combining class.
                shortName = "GrBase";
                
                header = header = "# Derived Property: " + name
-                    + "\r\n#  Generated from: [0..10FFFF] - Cc - Cf - Cs - Co - Cn - Zl - Zp - Grapheme_Link - Grapheme_Extend";
+                    + "\r\n#  Generated from: [0..10FFFF] - Cc - Cf - Cs - Co - Cn - Zl - Zp"
+                    + "\r\n#    - Grapheme_Extend - Grapheme_Link - CGJ";
            }
            boolean hasValue(int cp) {
+            	if (cp == 0x034F) return false;
                byte cat = ucdData.getCategory(cp);
                if (cat == Cc || cat == Cf || cat == Cs || cat == Co || cat == Cn || cat == Zl || cat == Zp
                || ucdData.getBinaryProperty(cp,GraphemeLink)) return false;
--- a/tools/unicodetools/com/ibm/text/UCD/DerivedPropertyLister.java
+++ b/tools/unicodetools/com/ibm/text/UCD/DerivedPropertyLister.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedPropertyLister.java,v $
-* $Date: 2002/03/15 00:34:46 $
-* $Revision: 1.9 $
+* $Date: 2002/03/20 00:21:43 $
+* $Revision: 1.10 $
 *
 *******************************************************************************
 */
@ -56,8 +56,7 @@ final class DerivedPropertyLister extends PropertyLister {
    }

    public String valueName(int cp) {
-        if (uprop.getValueType() != BINARY) return uprop.getValue(cp, LONG);
-        return uprop.getProperty(LONG);
+    	return uprop.getListingValue(cp);
    }

    //public String optionalComment(int cp) {
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java,v $
-* $Date: 2002/03/15 01:57:01 $
-* $Revision: 1.6 $
+* $Date: 2002/03/20 00:21:43 $
+* $Revision: 1.7 $
 *
 *******************************************************************************
 */
@ -24,6 +24,8 @@ public class GenerateCaseFolding implements UCD_Types {
    public static boolean COMMENT_DIFFS = false; // ON if we want a comment on mappings != lowercase
    public static boolean PICK_SHORT = false; // picks short value for SIMPLE if in FULL, changes weighting
    public static boolean NF_CLOSURE = false; // picks short value for SIMPLE if in FULL, changes weighting
+    static final int CHECK_CHAR = 0x130; // for debugging, change to actual character, otherwise -1
+     
    // PICK_SHORT & NF_CLOSURE = false for old style
    
    
@ -83,8 +85,14 @@ public class GenerateCaseFolding implements UCD_Types {
            if (rFull != null && rFull.equals(rSimple) 
              || (PICK_SHORT && UTF16.countCodePoint(rFull) == 1)) {
                String type = "C";
-                if (ch == 0x130 || ch == 0x131) type = "I";
-                drawLine(out, ch, type, rFull);
+                if (ch == 0x130) {
+                	drawLine(out, ch, "F", "i\u0307");
+                	drawLine(out, ch, "I", "\u0130");
+                } else if (ch == 0x131) {
+                	drawLine(out, ch, "I", "i");
+                } else {
+                	drawLine(out, ch, type, rFull);
+                }
            } else {
                if (rFull != null) {
                    drawLine(out, ch, "F", rFull);
@ -404,7 +412,7 @@ public class GenerateCaseFolding implements UCD_Types {
    }
    
    static boolean isExcluded(int ch) {
-        if (ch == 0x130) return true;                  // skip LATIN CAPITAL LETTER I WITH DOT ABOVE
+        // if (ch == 0x130) return true;                  // skip LATIN CAPITAL LETTER I WITH DOT ABOVE
        if (ch == 0x0132 || ch == 0x0133) return true; // skip IJ, ij
        if (ch == 0x037A) return true;                 // skip GREEK YPOGEGRAMMENI
        if (0x249C <= ch && ch <= 0x24B5) return true; // skip PARENTHESIZED LATIN SMALL LETTER A..
@ -456,7 +464,7 @@ public class GenerateCaseFolding implements UCD_Types {
                btitle = Main.nfc.normalize(btitle);
            }
            
-            if (ch == -1) {// for debugging, change to actual character
+            if (ch == CHECK_CHAR) {
                System.out.println("Code: " + Main.ucd.getCodeAndName(ch));
                System.out.println("Decomp: " + Main.ucd.getCodeAndName(decomp));
                System.out.println("Base: " + Main.ucd.getCodeAndName(base));
@ -474,11 +482,17 @@ public class GenerateCaseFolding implements UCD_Types {
            // presumably if there is a single code point, it would already be in the simple mappings
            
            if (UTF16.countCodePoint(flower) == 1 && UTF16.countCodePoint(fupper) == 1 
-                && UTF16.countCodePoint(title) == 1) continue;
+                	&& UTF16.countCodePoint(title) == 1) {
+            	if (ch == CHECK_CHAR) System.out.println("Skipping single code point: " + Main.ucd.getCodeAndName(ch));
+            	continue;
+            }
            
            // if there is no change from the base, skip
            
-            if (flower.equals(base) && fupper.equals(base) && ftitle.equals(base)) continue;
+            if (flower.equals(base) && fupper.equals(base) && ftitle.equals(base)) {
+            	if (ch == CHECK_CHAR) System.out.println("Skipping equals base: " + Main.ucd.getCodeAndName(ch));
+            	continue;
+            }
            
            // fix special cases
            // if (flower.equals(blower) && fupper.equals(bupper) && ftitle.equals(btitle)) continue;
@ -488,20 +502,26 @@ public class GenerateCaseFolding implements UCD_Types {
            
            // if there are no changes from the original, or the expanded original, skip
            
-            if (flower.equals(lower) && fupper.equals(upper) && ftitle.equals(title)) continue;
+            if (flower.equals(lower) && fupper.equals(upper) && ftitle.equals(title)) {
+            	if (ch == CHECK_CHAR) System.out.println("Skipping unchanged: " + Main.ucd.getCodeAndName(ch));
+            	continue;
+            }
            
            String name = Main.ucd.getName(ch);
            
            int order = name.equals("LATIN SMALL LETTER SHARP S") ? 1
-                : name.indexOf("ARMENIAN SMALL LIGATURE") >= 0 ? 3
-                : name.indexOf("LIGATURE") >= 0 ? 2
-                : name.indexOf("GEGRAMMENI") < 0 ? 4
-                : UTF16.countCodePoint(ftitle) == 1 ? 5
-                : UTF16.countCodePoint(fupper) == 2 ? 6
-                : 7;
+                : ch == 0x130 ? 2
+                : name.indexOf("ARMENIAN SMALL LIGATURE") >= 0 ? 4
+                : name.indexOf("LIGATURE") >= 0 ? 3
+                : name.indexOf("GEGRAMMENI") < 0 ? 5
+                : UTF16.countCodePoint(ftitle) == 1 ? 6
+                : UTF16.countCodePoint(fupper) == 2 ? 7
+                : 8;
+            
+            if (ch == CHECK_CHAR) System.out.println("Order: " + order + " for " + Main.ucd.getCodeAndName(ch));
            
            // HACK
-            boolean denormalize = !normalize && order != 5 && order != 6;
+            boolean denormalize = !normalize && order != 6 && order != 7;
            
            String mapping = Utility.hex(ch)
                + "; " + Utility.hex(flower.equals(base) ? chstr : denormalize ? Main.nfd.normalize(flower) : flower)
@ -544,12 +564,15 @@ public class GenerateCaseFolding implements UCD_Types {
                    out.println("# The German es-zed is special--the normal mapping is to SS.");
                    out.println("# Note: the titlecase should never occur in practice. It is equal to titlecase(uppercase(<es-zed>))");
                    break;
-                case 2: out.println("# Ligatures"); break;
-                case 3: skipLine = true; break;
-                case 4: out.println("# No corresponding uppercase precomposed character"); break;
-                case 5: Utility.appendFile("SpecialCasingIota.txt", true, out); break;
-                case 6: out.println("# Some characters with YPOGEGRAMMENI are also have no corresponding titlecases"); break;
-                case 7: skipLine = true; break;
+                case 2:
+                    out.println("# Preserve canonical equivalence for I with dot. Turkic is handled below.");
+					break;                	
+                case 3: out.println("# Ligatures"); break;
+                case 4: skipLine = true; break;
+                case 5: out.println("# No corresponding uppercase precomposed character"); break;
+                case 6: Utility.appendFile("SpecialCasingIota.txt", true, out); break;
+                case 7: out.println("# Some characters with YPOGEGRAMMENI are also have no corresponding titlecases"); break;
+                case 8: skipLine = true; break;
                }
                if (!skipLine) out.println();
            }
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateData.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateData.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
-* $Date: 2002/03/15 01:57:01 $
-* $Revision: 1.15 $
+* $Date: 2002/03/20 00:21:42 $
+* $Revision: 1.16 $
 *
 *******************************************************************************
 */
@ -1183,7 +1183,7 @@ public class GenerateData implements UCD_Types {
            Utility.dot(i);
            if (!Main.ucd.isRepresented(i)) continue;
            
-            if (!Main.nfd.hasDecomposition(i)) {
+            if (!Main.nfd.normalizationDiffers(i)) {
                if (Main.ucd.getScript(i) == LATIN_SCRIPT) {
                    int cp = i;
                    String hex = "u" + Utility.hex(cp, 4);
--- a/tools/unicodetools/com/ibm/text/UCD/Main.java
+++ b/tools/unicodetools/com/ibm/text/UCD/Main.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
-* $Date: 2002/03/15 00:34:46 $
-* $Revision: 1.9 $
+* $Date: 2002/03/20 00:21:42 $
+* $Revision: 1.10 $
 *
 *******************************************************************************
 */
@ -77,8 +77,11 @@ public final class Main implements UCD_Types {
            } else if (arg.equalsIgnoreCase("build")) ConvertUCD.main(new String[]{ucdVersion});
            else if (arg.equalsIgnoreCase("version")) ucdVersion = args[++i];
            else if (arg.equalsIgnoreCase("testskippable")) NFSkippable.main(null);
+            else if (arg.equalsIgnoreCase("diffIgnorable")) VerifyUCD.diffIgnorable();
            else if (arg.equalsIgnoreCase("generateXML")) VerifyUCD.generateXML();
            else if (arg.equalsIgnoreCase("checkSpeed")) VerifyUCD.checkSpeed();
+            else if (arg.equalsIgnoreCase("verifyNormalizationStability")) VerifyUCD.verifyNormalizationStability();
+            
            else if (arg.equalsIgnoreCase("generateHanTransliterator")) GenerateHanTransliterator.main();
            else if (arg.equalsIgnoreCase("compareBlueberry")) VerifyUCD.compareBlueberry();

--- a/tools/unicodetools/com/ibm/text/UCD/MyPropertyLister.java
+++ b/tools/unicodetools/com/ibm/text/UCD/MyPropertyLister.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/MyPropertyLister.java,v $
-* $Date: 2001/12/13 23:35:57 $
-* $Revision: 1.7 $
+* $Date: 2002/03/20 00:21:42 $
+* $Revision: 1.8 $
 *
 *******************************************************************************
 */
@ -85,7 +85,7 @@ final class MyPropertyLister extends PropertyLister {

        if (cat == Cn
            && propMask != (BINARY_PROPERTIES | Noncharacter_Code_Point)
-            && propMask != (BINARY_PROPERTIES | Reserved_Cf_Code_Point)
+            && propMask != (BINARY_PROPERTIES | Other_Default_Ignorable_Code_Point)
            && propMask != (CATEGORY | Cn)) {
            if (BRIDGE) return CONTINUE;
            else return EXCLUDE;
--- a/tools/unicodetools/com/ibm/text/UCD/Normalizer.java
+++ b/tools/unicodetools/com/ibm/text/UCD/Normalizer.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Normalizer.java,v $
-* $Date: 2002/03/15 01:57:01 $
-* $Revision: 1.7 $
+* $Date: 2002/03/20 00:21:42 $
+* $Revision: 1.8 $
 *
 *******************************************************************************
 */
@ -67,6 +67,13 @@ public final class Normalizer implements UCD_Types {
        return getName(form);
    }

+    /**
+     * Return string name
+     */
+    public String getUCDVersion() {
+        return data.getUCDVersion();
+    }
+
    /**
     * Does compose?
     */
@ -120,7 +127,6 @@ public final class Normalizer implements UCD_Types {
    }

    /**
-    */
    private StringBuffer hasDecompositionBuffer = new StringBuffer();

    public boolean hasDecomposition(int cp) {
@ -129,6 +135,7 @@ public final class Normalizer implements UCD_Types {
        if (hasDecompositionBuffer.length() != 1) return true;
        return cp != hasDecompositionBuffer.charAt(0);
    }
+    */

    /**
     * Does a quick check to see if the string is in the current form. Checks canonical order and
@ -427,6 +434,11 @@ public final class Normalizer implements UCD_Types {
                if (ucd.
            */
        }
+        
+        String getUCDVersion() {
+        	return ucd.getVersion();
+        }
+        
        /*
 Problem: differs: true, call: false U+0385 GREEK DIALYTIKA TONOS
 Problem: differs: true, call: false U+03D3 GREEK UPSILON WITH ACUTE AND HOOK SYMBOL
--- a/tools/unicodetools/com/ibm/text/UCD/SpecialCasingFooter.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/SpecialCasingFooter.txt
@ -48,10 +48,14 @@
 # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
 # The following rules handle those cases.

+0130; 0069; 0130; 0130; tr # LATIN CAPITAL LETTER I WITH DOT ABOVE
+0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE
+
 # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
 # This matches the behavior of the canonically equivalent I-dot_above

-0307; ; 0307; 0307; After_Soft_Dotted; # COMBINING DOT ABOVE
+0307; ; 0307; 0307; tr After_Soft_Dotted; # COMBINING DOT ABOVE
+0307; ; 0307; 0307; az After_Soft_Dotted; # COMBINING DOT ABOVE

 # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.

@ -63,7 +67,6 @@
 0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I
 0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I

-# Note: the following cases are already in the UnicodeData file.
+# Note: the following case is already in the UnicodeData file.

 # 0131; 0131; 0049; 0049; tr; # LATIN SMALL LETTER DOTLESS I
-# 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE
--- a/tools/unicodetools/com/ibm/text/UCD/UCD.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
-* $Date: 2001/12/13 23:35:57 $
-* $Revision: 1.9 $
+* $Date: 2002/03/20 00:21:42 $
+* $Revision: 1.10 $
 *
 *******************************************************************************
 */
@ -1027,6 +1027,19 @@ to guarantee identifier closure.
    }

    private void fillFromFile(String version) {
+    	try {
+    		fillFromFile2(version);
+    	} catch (ChainException e) {
+    		try {
+    			ConvertUCD.main(new String[]{version});
+    		} catch (Exception e2) {
+            	throw new ChainException("Can't build data file for {0}", new Object[]{version}, e2);
+    		}
+    		fillFromFile2(version);
+    	}
+    }
+    
+    private void fillFromFile2(String version) {
        DataInputStream dataIn = null;
        String fileName = BIN_DIR + "UCD_Data" + version + ".bin";
        int uDataFileCount = 0;
--- a/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $
-* $Date: 2002/03/15 00:34:46 $
-* $Revision: 1.12 $
+* $Date: 2002/03/20 00:21:42 $
+* $Revision: 1.13 $
 *
 *******************************************************************************
 */
@ -636,6 +636,7 @@ final class UCD_Names implements UCD_Types {
        "TEH_MARBUTA",
        "TETH",
        "WAW",
+        "SYRIAC WAW",
        "YEH",
        "YEH_BARREE",
        "YEH_WITH_TAIL",
@ -652,21 +653,21 @@ final class UCD_Names implements UCD_Types {
        "BEH",
        "BETH",
        "DAL",
-        "DALATH RISH",
+        "DALATH_RISH",
        "E",
        "FEH",
-        "FINAL SEMKATH",
+        "FINAL_SEMKATH",
        "GAF",
        "GAMAL",
        "HAH",
-        "HAMZA ON HEH GOAL",
+        "HAMZA_ON_HEH_GOAL",
        "HE",
        "HEH",
-        "HEH GOAL",
+        "HEH_GOAL",
        "HETH",
        "KAF",
        "KAPH",
-        "KNOTTED HEH",
+        "KNOTTED_HEH",
        "LAM",
        "LAMADH",
        "MEEM",
@ -677,23 +678,24 @@ final class UCD_Names implements UCD_Types {
        "QAF",
        "QAPH",
        "REH",
-        "REVERSED PE",
+        "REVERSED_PE",
        "SAD",
        "SADHE",
        "SEEN",
        "SEMKATH",
        "SHIN",
-        "SWASH KAF",
+        "SWASH_KAF",
        "TAH",
        "TAW",
-        "TEH MARBUTA",
+        "TEH_MARBUTA",
        "TETH",
        "WAW",
+        "SYRIAC WAW",
        "YEH",
-        "YEH BARREE",
-        "YEH WITH TAIL",
+        "YEH_BARREE",
+        "YEH_WITH_TAIL",
        "YUDH",
-        "YUDH HE",
+        "YUDH_HE",
        "ZAIN",
    };

--- a/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $
-* $Date: 2002/03/15 00:34:46 $
-* $Revision: 1.9 $
+* $Date: 2002/03/20 00:21:42 $
+* $Revision: 1.10 $
 *
 *******************************************************************************
 */
@ -15,7 +15,7 @@ package com.ibm.text.UCD;

 public interface UCD_Types {
    
-    public static final int dVersion = 7; // change to fix the generated file D version. If less than zero, no "d"
+    public static final int dVersion = 8; // change to fix the generated file D version. If less than zero, no "d"
    
    public static final String BASE_DIR = "C:\\DATA\\";
    public static final String UCD_DIR = BASE_DIR + "UCD\\";
@ -23,7 +23,7 @@ public interface UCD_Types {
    public static final String GEN_DIR = BASE_DIR + "GEN\\";


-    static final byte BINARY_FORMAT = 5; // bumped if binary format of UCD changes
+    static final byte BINARY_FORMAT = 6; // bumped if binary format of UCD changes
    
    // Unicode Property Types
    static final byte 
@ -188,7 +188,7 @@ public interface UCD_Types {
        IDS_TrinaryOperator = 24,
        Radical = 25,
        UnifiedIdeograph = 26,
-        Reserved_Cf_Code_Point = 27,
+        Other_Default_Ignorable_Code_Point = 27,
        Deprecated = 28,
        Soft_Dotted = 29,
        Logical_Order_Exception = 30,
@ -407,13 +407,14 @@ public static byte
    TEH_MARBUTA = 41,
    TETH = 42,
    WAW = 43,
-    YEH = 44,
-    YEH_BARREE = 45,
-    YEH_WITH_TAIL = 46,
-    YUDH = 47,
-    YUDH_HE = 48,
-    ZAIN = 49,
-    LIMIT_JOINING_GROUP = 50;
+    SYRIAC_WAW = 44,
+    YEH = 45,
+    YEH_BARREE = 46,
+    YEH_WITH_TAIL = 47,
+    YUDH = 48,
+    YUDH_HE = 49,
+    ZAIN = 50,
+    LIMIT_JOINING_GROUP = 51;
    
    static final byte NFD = 0, NFC = 1, NFKD = 2, NFKC = 3;    
    public static final int
--- a/tools/unicodetools/com/ibm/text/UCD/UnicodeProperty.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UnicodeProperty.java
@ -137,6 +137,14 @@ public abstract class UnicodeProperty implements UCD_Types {
            }
      }
      
+      /**
+       * special hack for NFD/NFKD
+       */
+		public String getListingValue(int cp) {
+        	if (getValueType() != BINARY) return getValue(cp, LONG);
+        	return getProperty(LONG);
+		}
+      
      /**
       * Does it have the propertyValue?
       */
--- a/tools/unicodetools/com/ibm/text/UCD/VerifyUCD.java
+++ b/tools/unicodetools/com/ibm/text/UCD/VerifyUCD.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/VerifyUCD.java,v $
-* $Date: 2002/03/15 01:57:01 $
-* $Revision: 1.10 $
+* $Date: 2002/03/20 00:21:42 $
+* $Revision: 1.11 $
 *
 *******************************************************************************
 */
@ -674,12 +674,12 @@ can help you narrow these down.
            if (cp == 0x3131) {
                System.out.println("Debug: " + idnProhibited
                    + ", " + idnUnassigned
-                    + ", " + Main.nfkc.hasDecomposition(cp)
+                    + ", " + Main.nfkd.normalizationDiffers(cp)
                    + ", " + Main.ucd.getCodeAndName(Main.nfkc.normalize(cp))
                    + ", " + Main.ucd.getCodeAndName(Main.nfc.normalize(cp)));
            } 
            
-            if (!idnProhibited && ! idnUnassigned && Main.nfkc.hasDecomposition(cp)) {
+            if (!idnProhibited && ! idnUnassigned && Main.nfkd.normalizationDiffers(cp)) {
                String kc = Main.nfkc.normalize(cp);
                String c = Main.nfc.normalize(cp);
                if (kc.equals(c)) continue;
@ -1045,6 +1045,47 @@ E0020-E007F; [TAGGING CHARACTERS]
        }
        return result;
    }
+    
+    /*
+                    + "\r\n#  Generated from <2060..206F, FFF0..FFFB, E0000..E0FFF>"
+                    + "\r\n#    + Other_Default_Ignorable_Code_Point + (Cf + Cc + Cs - White_Space)";
+    */
+    
+    public static void diffIgnorable () {
+        Main.setUCD();
+    	
+    	UnicodeSet control = UnifiedBinaryProperty.make(CATEGORY + Cf, Main.ucd).getSet();
+    	
+    	System.out.println("Cf");
+    	Utility.showSetNames("", control, false, Main.ucd);
+    	
+    	control.addAll(UnifiedBinaryProperty.make(CATEGORY + Cc, Main.ucd).getSet());
+
+    	System.out.println("Cf + Cc");
+    	Utility.showSetNames("", control, false, Main.ucd);
+    	
+    	control.addAll(UnifiedBinaryProperty.make(CATEGORY + Cs, Main.ucd).getSet());
+
+    	System.out.println("Cf + Cc + Cs");
+    	Utility.showSetNames("", control, false, Main.ucd);
+    	
+    	control.removeAll(UnifiedBinaryProperty.make(BINARY_PROPERTIES + White_space, Main.ucd).getSet());
+    	
+    	System.out.println("Cf + Cc + Cs - WhiteSpace");
+    	Utility.showSetNames("", control, false, Main.ucd);
+
+    	control.add(0x2060,0x206f).add(0xFFF0,0xFFFB).add(0xE0000,0xE0FFF);
+    	
+    	System.out.println("(Cf + Cc + Cs - WhiteSpace) + ranges");
+    	Utility.showSetNames("", control, false, Main.ucd);
+
+    	UnicodeSet odicp = UnifiedBinaryProperty.make(BINARY_PROPERTIES + Other_Default_Ignorable_Code_Point, Main.ucd).getSet();
+    	
+    	odicp.removeAll(control);
+    	
+    	System.out.println("Minimal Default Ignorable Code Points");
+    	Utility.showSetNames("", odicp, true, Main.ucd);
+    }


    public static void IdentifierTest() {
@ -1241,6 +1282,95 @@ E0020-E007F; [TAGGING CHARACTERS]
        if (cat == Lu || cat == Lt || cat == Ll) return "LC";
        return Main.ucd.getCategoryID(cp);
    }
+    
+    static public void verifyNormalizationStability() {
+        Main.setUCD();
+		verifyNormalizationStability2("3.1.0");
+		verifyNormalizationStability2("3.0.0");
+    }
+    
+    static public void verifyNormalizationStability2(String version) {
+        
+        Main.nfd.normalizationDiffers(0x10300);
+        
+        UCD older = UCD.make(version); // Main.ucd.getPreviousVersion();
+        
+        Normalizer oldNFC = new Normalizer(Normalizer.NFC, older.getVersion());
+        Normalizer oldNFD = new Normalizer(Normalizer.NFD, older.getVersion());
+        Normalizer oldNFKC = new Normalizer(Normalizer.NFKC, older.getVersion());
+        Normalizer oldNFKD = new Normalizer(Normalizer.NFKD, older.getVersion());
+        
+        System.out.println("Testing " + Main.nfd.getUCDVersion() + " against " + oldNFD.getUCDVersion());
+        
+        for (int i = 0; i <= 0x10FFFF; ++i) {
+        	Utility.dot(i);
+            if (!Main.ucd.isAssigned(i)) continue;
+            byte cat = Main.ucd.getCategory(i);
+            if (cat == Cs || cat == PRIVATE_USE) continue;
+            
+            if (i == 0x5e) {
+            	System.out.println("debug");
+            	String test1 = Main.nfkd.normalize(i);
+            	String test2 = oldNFKD.normalize(i);
+        		System.out.println("Testing (new/old)" + Main.ucd.getCodeAndName(i));
+    			System.out.println("\t" + Main.ucd.getCodeAndName(test1));
+    			System.out.println("\t" + Main.ucd.getCodeAndName(test2));
+            }
+            	
+            if (older.isAssigned(i)) {
+            	
+            	int newCan = Main.ucd.getCombiningClass(i);
+            	int oldCan = older.getCombiningClass(i);
+            	if (newCan != oldCan) {
+            		System.out.println("FAILS CCC STABILITY: " + newCan + " != " + oldCan
+            			+ "; " + Main.ucd.getCodeAndName(i));
+            	}
+            	
+            	verifyEquals(i, "NFD STABILITY (new/old)", Main.nfd.normalize(i), oldNFD.normalize(i));
+            	verifyEquals(i, "NFC STABILITY (new/old)", Main.nfc.normalize(i), oldNFC.normalize(i));
+            	verifyEquals(i, "NFKD STABILITY (new/old)", Main.nfkd.normalize(i), oldNFKD.normalize(i));
+            	verifyEquals(i, "NFKC STABILITY (new/old)", Main.nfkc.normalize(i), oldNFKC.normalize(i));
+            	
+            } else {
+            	// not in older version. 
+            	// (1) If there is a decomp, and it is composed of all OLD characters, then it must NOT compose
+            	if (Main.nfd.normalizationDiffers(i)) {
+            		String decomp = Main.nfd.normalize(i);
+            		if (noneHaveCategory(decomp, Cn, older)) {
+            			String recomp = Main.nfc.normalize(decomp);
+            			if (recomp.equals(UTF16.valueOf(i))) {
+        					Utility.fixDot();
+            				System.out.println("FAILS COMP STABILITY: " + Main.ucd.getCodeAndName(i));
+    						System.out.println("\t" + Main.ucd.getCodeAndName(decomp));
+    						System.out.println("\t" + Main.ucd.getCodeAndName(recomp));
+    						System.out.println();
+    						throw new IllegalArgumentException("Comp stability");
+            			}
+            		}
+            	}
+            }
+        }
+    }
+    
+    public static boolean noneHaveCategory(String s, byte cat, UCD ucd) {
+    	int cp;
+    	for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
+    		cp = UTF16.charAt(s, i);
+    		byte cat2 = ucd.getCategory(i);
+    		if (cat == cat2) return false;
+    	}
+    	return true;
+    }
+    
+    public static void verifyEquals(int cp, String message, String a, String b) {
+    	if (!a.equals(b)) {
+        	Utility.fixDot();
+    		System.out.println("FAILS " + message + ": " + Main.ucd.getCodeAndName(cp));
+    		System.out.println("\t" + Main.ucd.getCodeAndName(a));
+    		System.out.println("\t" + Main.ucd.getCodeAndName(b));
+    		System.out.println();
+    	}
+    }

    public static void checkAgainstUInfo() {
    /*