ICU-0 updates for security

X-SVN-Rev: 17966
2005-06-21 21:28:31 +00:00 · 2005-06-21 21:28:31 +00:00 · 4f2a64c207
commit 4f2a64c207
parent 2847e795d6
5 changed files with 1100 additions and 109 deletions
--- a/tools/unicodetools/com/ibm/text/UCD/CheckICU.java
+++ b/tools/unicodetools/com/ibm/text/UCD/CheckICU.java
@ -3,6 +3,7 @@ package com.ibm.text.UCD;
 import java.io.IOException;
 import java.io.PrintWriter;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
@ -19,7 +20,9 @@ import com.ibm.icu.dev.test.util.UnicodeLabel;
 import com.ibm.icu.dev.test.util.UnicodeProperty;
 import com.ibm.icu.dev.test.util.ICUPropertyFactory;
 import com.ibm.icu.lang.UProperty;
+import com.ibm.icu.text.Collator;
 import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.util.ULocale;
 import com.ibm.text.utility.Utility;

 public class CheckICU {
@ -51,8 +54,8 @@ public class CheckICU {
    
 
    public static void test() throws IOException {
-        //generateFile("4.0.0", "DerivedCombiningClass");
-        //generateFile("4.0.0", "DerivedCoreProperties");
+        checkAvailable();
+        if (true) return;
        checkUCD();
        itemFailures = new UnicodeSet();
        icuFactory = ICUPropertyFactory.make();
@ -86,7 +89,86 @@ public class CheckICU {
        }
    }

-    private static void checkUCD() throws IOException {
+    /**
+	 * 
+	 */
+	private static void checkAvailable() {
+		//generateFile("4.0.0", "DerivedCombiningClass");
+        //generateFile("4.0.0", "DerivedCoreProperties");
+    	ULocale[] locales = Collator.getAvailableULocales();
+    	
+    	System.out.println("Collation");
+    	System.out.println("Possible keyword=values pairs:");
+    	{
+	    	String[] keywords = Collator.getKeywords();
+	    	for (int i = 0; i < Collator.getKeywords().length; ++i) {
+	    		String[] values = Collator.getKeywordValues(keywords[i]);
+	    		for (int j = 0; j < values.length; ++j) {
+	    			System.out.println("\t" + keywords[i] + "=" + values[j]);
+	    		}
+	    	}
+    	}
+    	System.out.println("Differing Collators:");
+    	Set testSet = new HashSet(Arrays.asList(new String[] {
+    		"nl", "de", "de_DE", "zh_TW"
+    	}));
+    	for (int k = 0; k < locales.length; ++k) {
+    		if (!testSet.contains(locales[k].toString())) continue;
+			showCollationVariants(locales[k]);
+    	}
+	}
+
+	/**
+	 * 
+	 */
+	private static void showCollationVariants(ULocale locale) {
+		String[] keywords = Collator.getKeywords();
+		System.out.println(locale.getDisplayName(ULocale.ENGLISH) + " [" + locale + "]");
+		for (int i = 0; i < Collator.getKeywords().length; ++i) {
+			ULocale base = Collator.getFunctionalEquivalent(keywords[i], 
+					locale
+					//new ULocale(locale + "@" + keywords[i] + "=standard")
+					);
+			if (true) System.out.println("\"" + base + "\" == Collator.getFunctionalEquivalent(\"" + keywords[i] + "\", \"" + locale + "\");");
+			String[] values = Collator.getKeywordValues(keywords[i]);
+			for (int j = 0; j < Collator.getKeywordValues(keywords[i]).length; ++j) {       			
+				ULocale other = Collator.getFunctionalEquivalent(keywords[i], 
+						new ULocale(locale + "@" + keywords[i] + "=" + values[j]));
+				if (true) System.out.println(
+						"\"" + other
+						+ "\" == Collator.getFunctionalEquivalent(\"" + keywords[i]
+						+ "\", new ULocale(\""
+						+ locale + "@" + keywords[i] + "=" + values[j] + "\");");
+				// HACK: commented line should work but doesn't
+				if (!other.equals(base)) {
+				//if (other.toString().indexOf("@") >= 0) {
+					System.out.println("\t" + keywords[i] + "=" + values[j] + "; \t" + base + "; \t" + other);
+				}
+			}
+		}
+	}
+
+/**
+ * Sample code that prints out the variants that 'make a difference' for a given locale.
+ * To iterate through the locales, use Collator.getVariant
+ */
+private static void showCollationVariants2(ULocale locale) {
+	String[] keywords = Collator.getKeywords();
+	System.out.println(locale.getDisplayName(ULocale.ENGLISH) + " [" + locale + "]");
+	for (int i = 0; i < Collator.getKeywords().length; ++i) {
+		ULocale base = Collator.getFunctionalEquivalent(keywords[i], locale);
+		String[] values = Collator.getKeywordValues(keywords[i]);
+		for (int j = 0; j < Collator.getKeywordValues(keywords[i]).length; ++j) {       			
+			ULocale other = Collator.getFunctionalEquivalent(keywords[i], 
+					new ULocale(locale + "@" + keywords[i] + "=" + values[j]));
+			if (!other.equals(base)) {
+				System.out.println("\t" + keywords[i] + "=" + values[j] + "; \t" + base + "; \t" + other);
+			}
+		}
+	}
+}
+
+	private static void checkUCD() throws IOException {
        UCD myUCD = UCD.make("4.0.0");
        Normalizer nfc = new Normalizer(Normalizer.NFC, "4.0.0");
        UnicodeSet leading = new UnicodeSet();
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateConfusables.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateConfusables.java
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateStringPrep.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateStringPrep.java
@ -290,7 +290,7 @@ class GenerateStringPrep implements UCD_Types {
 	/**
 	 * 
 	 */
-	private int getIDNAType(int cp) {
+	static public int getIDNAType(int cp) {
 		inbuffer.setLength(0);
 		UTF16.append(inbuffer, cp);
 		try {
@ -310,8 +310,8 @@ class GenerateStringPrep implements UCD_Types {
 			return REMAPPED;
 		return OK;
 	}
-	StringBuffer inbuffer = new StringBuffer();
-	StringBuffer intermediate, outbuffer;
+	static StringBuffer inbuffer = new StringBuffer();
+	static StringBuffer intermediate, outbuffer;

 	UnicodeSet lowercase = new UnicodeSet("[:Lowercase:]");

--- a/tools/unicodetools/com/ibm/text/UCD/QuickTest.java
+++ b/tools/unicodetools/com/ibm/text/UCD/QuickTest.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/QuickTest.java,v $
-* $Date: 2005/06/08 01:44:48 $
-* $Revision: 1.4 $
+* $Date: 2005/06/21 21:28:31 $
+* $Revision: 1.5 $
 *
 *******************************************************************************
 */
@ -15,6 +15,7 @@ package com.ibm.text.UCD;

 import java.util.*;
 import java.io.*;
+
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;

@ -22,6 +23,24 @@ import com.ibm.text.utility.*;

 public class QuickTest implements UCD_Types {
 	static final void test() {
+		String test2 = "ab\u263ac";
+		StringTokenizer st = new StringTokenizer(test2, "\u263a");
+		try {
+			while (true) {
+				String s = st.nextToken();
+				System.out.println(s);
+			}
+		} catch (Exception e) {		}
+		StringReader r = new StringReader(test2);
+		StreamTokenizer s = new StreamTokenizer(r);
+		try {
+			while (true) {
+				int x = s.nextToken();
+				if (x == StreamTokenizer.TT_EOF) break;
+				System.out.println(s.sval);
+			}
+		} catch (Exception e) {		}
+		
 		String testString = "en-Arab-200-gaulish-a-abcd-def-x-abcd1234-12345678";
 		for (int i = testString.length() + 1; i > 0; --i) {
 			String trunc = truncateValidLanguageTag(testString, i);
--- a/tools/unicodetools/com/ibm/text/utility/Utility.java
+++ b/tools/unicodetools/com/ibm/text/utility/Utility.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
-* $Date: 2005/05/27 21:39:03 $
-* $Revision: 1.49 $
+* $Date: 2005/06/21 21:28:31 $
+* $Revision: 1.50 $
 *
 *******************************************************************************
 */
@ -898,14 +898,19 @@ public final class Utility implements UCD_Types {    // COMMON UTILITIES
        }
        if (!unique || !set.contains(value)) set.add(value);
    }
-        
+
    public static String readDataLine(BufferedReader br) throws IOException {
+    	return readDataLine(br, null);
+    }
+    
+    public static String readDataLine(BufferedReader br, int[] count) throws IOException {
        String originalLine = "";
        String line = "";
        
        try {
            line = originalLine = br.readLine();
            if (line == null) return null;
+            if (count != null) ++count[0];
            if (line.length() > 0 && line.charAt(0) == 0xFEFF) line = line.substring(1);
            int commentPos = line.indexOf('#');
            if (commentPos >= 0) line = line.substring(0, commentPos);