scuffed-code/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.java

package com.ibm.text.UCD;

import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.TreeMap;
import java.util.TreeSet;

import com.ibm.icu.dev.test.util.BagFormatter;
import com.ibm.icu.dev.test.util.UnicodeProperty;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.text.utility.UnicodeDataFile;

public class MakeUnicodeFiles {
    
    static boolean DEBUG = true;
    
    public static void main() throws IOException {
        generateFile("Scripts","z");
    }
    
    static class OrderedMap {
         HashMap map = new HashMap();
         ArrayList keys = new ArrayList();
         void put(Object o, Object t) {
             map.put(o,t);
             keys.add(o);
         }
         List keyset() {
             return keys;
         }
     }
    
     static class PrintStyle {
         boolean longForm = false;
         boolean noLabel = false;
         boolean makeUppercase = false;
         boolean makeFirstLetterLowercase = false;
         String skipValue = null;
         String skipUnassigned = null;
         boolean orderByRangeStart = false;
         boolean valueList = false;
        
         PrintStyle setLongForm(boolean value) {
             longForm = value;
             return this;
         }        
         PrintStyle setSkipUnassigned(String value) {
             skipUnassigned = value;
             return this;
         }        
         PrintStyle setNoLabel(boolean value) {
             noLabel = value;
             return this;
         }        
         PrintStyle setMakeUppercase(boolean value) {
             makeUppercase = value;
             return this;
         }        
         PrintStyle setMakeFirstLetterLowercase(boolean value) {
             makeFirstLetterLowercase = value;
             return this;
         }        
         PrintStyle setSkipValue(String value) {
             skipValue = value;
             return this;
         }        
         PrintStyle setOrderByRangeStart(boolean value) {
             orderByRangeStart = value;
             return this;
         }        
         PrintStyle setValueList(boolean value) {
             valueList = value;
             return this;
         }        
     }
     static PrintStyle DEFAULT_PRINT_STYLE = new PrintStyle();
     static Comparator skeletonComparator = new UnicodeProperty.SkeletonComparator();
     static Map printStyles = new TreeMap(/*skeletonComparator*/);
     static {
         printStyles.put("Script", new PrintStyle().setLongForm(true)
            .setMakeUppercase(true).setSkipUnassigned("Common"));
         printStyles.put("Age", new PrintStyle().setNoLabel(true));
         printStyles.put("Numeric_Type", new PrintStyle().setLongForm(true)
             .setMakeFirstLetterLowercase(true).setSkipUnassigned("none"));
         printStyles.put("General_Category", new PrintStyle().setNoLabel(true)
             //.setSkipUnassigned(true)
         );
         printStyles.put("Line_Break", new PrintStyle().setSkipUnassigned("Unknown"));
         printStyles.put("Joining_Type", new PrintStyle().setSkipValue("Non_Joining"));
         printStyles.put("Joining_Group", new PrintStyle().setSkipValue("No_Joining_Group")
         .setMakeUppercase(true));
         printStyles.put("East_Asian_Width", new PrintStyle().setSkipUnassigned("Neutral"));
         printStyles.put("Decomposition_Type", new PrintStyle().setLongForm(true)
             .setSkipValue("None").setMakeFirstLetterLowercase(true));
         printStyles.put("Bidi_Class", new PrintStyle().setSkipUnassigned("Left_To_Right"));
         printStyles.put("Block", new PrintStyle().setNoLabel(true)
            .setValueList(true));
         printStyles.put("Age", new PrintStyle().setSkipValue("unassigned"));
         printStyles.put("Canonical_Combining_Class", new PrintStyle().setSkipValue("0"));
         printStyles.put("Hangul_Syllable_Type", new PrintStyle().setSkipValue("NA"));
         
     }
     //PropertyAliases
     //PropertyValueAliases
     //CompositionExclusions
     //SpecialCasing
     //NormalizationTest
     //add("CaseFolding", new String[] {"CaseFolding"});
     static Map contents = new TreeMap();
     static void add(String name, String[] properties) {
         contents.put(name, properties);
     }
     static {
         add("Blocks", new String[] {"Block"});
         add("DerivedAge", new String[] {"Age"});
         add("Scripts", new String[] {"Script"});
         add("HangulSyllableType", new String[] {"HangulSyllableType"});
         if (false) add("DerivedNormalizationProps", new String[] {
             "FNC", "Full_Composition_Exclusion", 
             "NFD_QuickCheck", "NFC_QuickCheck", "NFKD_QuickCheck", "NFKC_QuickCheck", 
             "Expands_On_NFD", "Expands_On_NFC", "Expands_On_NFKD", "Expands_On_NFKC"});
        
         add("DerivedBidiClass", new String[] {"BidiClass"});
         add("DerivedBinaryProperties", new String[] {"BidiMirrored"});
         add("DerivedCombiningClass", new String[] {"CanonicalCombiningClass"});
         add("DerivedDecompositionType", new String[] {"DecompositionType"});
         add("DerivedEastAsianWidth", new String[] {"EastAsianWidth"});
         add("DerivedGeneralCategory", new String[] {"GeneralCategory"});
         add("DerivedJoiningGroup", new String[] {"JoiningGroup"});
         add("DerivedJoiningType", new String[] {"JoiningType"});
         add("DerivedLineBreak", new String[] {"LineBreak"});
         add("DerivedNumericType", new String[] {"NumericType"});
         add("DerivedNumericValues", new String[] {"NumericValue"});
         add("PropList", new String[] {
             "White_Space", "Bidi_Control", "Join_Control",
             "Dash", "Hyphen", "Quotation_Mark",
             "Terminal_Punctuation", "Other_Math", 
             "Hex_Digit", "ASCII_Hex_Digit",
             "Other_Alphabetic",
             "Ideographic",
             "Diacritic", "Extender", 
             "Other_Lowercase", "Other_Uppercase",
             "Noncharacter_Code_Point",
             "Other_Grapheme_Extend",
             "Grapheme_Link",
             "IDS_Binary_Operator", "IDS_Trinary_Operator",
             "Radical", "Unified_Ideograph", 
             "Other_Default_Ignorable_Code_Point",
             "Deprecated", "Soft_Dotted",
             "Logical_Order_Exception",
             "Other_ID_Start"
         });
         add("DerivedCoreProperties", new String[] {
             "Math", "Alphabetic", "Lowercase", "Uppercase",
             "ID_Start", "ID_Continue", 
             "XID_Start", "XID_Continue", 
             "Default_Ignorable_Code_Point",
             "Grapheme_Extend", "Grapheme_Base"
         });
     }
    
     public static void generateFile(String atOrAfter, String atOrBefore) throws IOException {
         Iterator it = contents.keySet().iterator();
         while (it.hasNext()) {
             String propname = (String) it.next();
             if (propname.compareTo(atOrAfter) < 0) continue;
             if (propname.compareTo(atOrBefore) > 0) continue;
             generateFile(propname);
         }
     }
    
     public static void generateFile(String filename) throws IOException {
         String[] propList = (String[]) contents.get(filename);
         UnicodeDataFile udf = UnicodeDataFile.openAndWriteHeader("DerivedDataTest/", filename);
         PrintWriter pw = udf.out; // bf2.openUTF8Writer(UCD_Types.GEN_DIR, "Test" + filename + ".txt");
         UnicodeProperty.Factory toolFactory 
           = ToolUnicodePropertySource.make(Default.ucdVersion());
         BagFormatter bf2 = new BagFormatter(toolFactory);
         UnicodeSet unassigned = toolFactory.getSet("gc=cn")
             .addAll(toolFactory.getSet("gc=cs"));
         //System.out.println(unassigned.toPattern(true));
         // .removeAll(toolFactory.getSet("noncharactercodepoint=true"));
         String separator = bf2.getLineSeparator() 
         + "# ================================================"
         + bf2.getLineSeparator() + bf2.getLineSeparator();

         for (int i = 0; i < propList.length; ++i) {
             UnicodeProperty prop = toolFactory.getProperty(propList[i]);
             System.out.println(prop.getName());
             pw.print(separator);
             PrintStyle ps = (PrintStyle) printStyles.get(prop.getName());
             if (ps == null) {
                 ps = DEFAULT_PRINT_STYLE;
                 System.out.println("Using default style!");
             }           
             if (ps.noLabel) bf2.setLabelSource(null);
            
             if (ps.valueList) {
                 bf2.setValueSource(new UnicodeProperty.FilteredProperty(prop, new ReplaceFilter()))
                 .setNameSource(null)
                 .setShowCount(false)
                 .showSetNames(pw,new UnicodeSet(0,0x10FFFF));                 
             } else if (prop.getType() <= prop.EXTENDED_BINARY) {
                 UnicodeSet s = prop.getSet("True");
                 bf2.setValueSource(prop.getName());
                 bf2.showSetNames(pw, s);
             } else {
                 bf2.setValueSource(prop);
                 Collection aliases = prop.getAvailableValueAliases();
                 if (ps.orderByRangeStart) {
                     System.out.println("Reordering");
                     TreeSet temp2 = new TreeSet(new RangeStartComparator(prop));
                     temp2.addAll(aliases);
                     aliases = temp2;
                 } 
                 Iterator it = aliases.iterator();
                 while (it.hasNext()) {
                     String value = (String)it.next();
                     UnicodeSet s = prop.getSet(value);

                     System.out.println(value + "\t" + prop.getShortestValueAlias(value) + "\t" + ps.skipValue);
                     System.out.println(s.toPattern(true));
                    
                     if (skeletonComparator.compare(value, ps.skipValue) == 0) continue;
                     if (skeletonComparator.compare(value, ps.skipUnassigned) == 0) {
                         s.removeAll(unassigned);
                     } 
                    
                     if (s.size() == 0) continue;
                     //if (unassigned.containsAll(s)) continue; // skip if all unassigned
                     //if (s.contains(0xD0000)) continue; // skip unassigned
                     pw.print(separator);
                     if (!ps.longForm) value = prop.getShortestValueAlias(value);
                     if (ps.makeUppercase) value = value.toUpperCase(Locale.ENGLISH);
                     if (ps.makeFirstLetterLowercase) {
                         // NOTE: this is ok since we are only working in ASCII
                         value = value.substring(0,1).toLowerCase(Locale.ENGLISH)
                             + value.substring(1);
                     } 
                     bf2.setValueSource(value);
                     bf2.showSetNames(pw, s);
                }
             }
         }
         udf.close();
     }
    static class RangeStartComparator implements Comparator {
        UnicodeProperty prop;
        CompareProperties.UnicodeSetComparator comp = new CompareProperties.UnicodeSetComparator();
        RangeStartComparator(UnicodeProperty prop) {
            this.prop = prop;
        }
        public int compare(Object o1, Object o2) {
            UnicodeSet s1 = prop.getSet((String)o1);
            UnicodeSet s2 = prop.getSet((String)o2);
            if (true) System.out.println("comparing " + o1 + ", " + o2
                + s1.toPattern(true) + "?" + s2.toPattern(true)
                + ", " + comp.compare(s1, s2));
            return comp.compare(s1, s2);
        }
        
    }
    
    public static class ReplaceFilter extends UnicodeProperty.StringFilter {
        public String remap(String original) {
            return original.replace('_',' ');
        }
    }
    

}
various changes for new properties X-SVN-Rev: 14494 2004-02-12 08:23:19 +00:00			`package com.ibm.text.UCD;`

			`import java.io.IOException;`
			`import java.io.PrintWriter;`
			`import java.util.ArrayList;`
			`import java.util.Collection;`
			`import java.util.Comparator;`
			`import java.util.HashMap;`
			`import java.util.Iterator;`
			`import java.util.List;`
			`import java.util.Locale;`
			`import java.util.Map;`
			`import java.util.TreeMap;`
			`import java.util.TreeSet;`

			`import com.ibm.icu.dev.test.util.BagFormatter;`
			`import com.ibm.icu.dev.test.util.UnicodeProperty;`
			`import com.ibm.icu.text.UnicodeSet;`
			`import com.ibm.text.utility.UnicodeDataFile;`

			`public class MakeUnicodeFiles {`

			`static boolean DEBUG = true;`

			`public static void main() throws IOException {`
			`generateFile("Scripts","z");`
			`}`

			`static class OrderedMap {`
			`HashMap map = new HashMap();`
			`ArrayList keys = new ArrayList();`
			`void put(Object o, Object t) {`
			`map.put(o,t);`
			`keys.add(o);`
			`}`
			`List keyset() {`
			`return keys;`
			`}`
			`}`

			`static class PrintStyle {`
			`boolean longForm = false;`
			`boolean noLabel = false;`
			`boolean makeUppercase = false;`
			`boolean makeFirstLetterLowercase = false;`
			`String skipValue = null;`
			`String skipUnassigned = null;`
			`boolean orderByRangeStart = false;`
			`boolean valueList = false;`

			`PrintStyle setLongForm(boolean value) {`
			`longForm = value;`
			`return this;`
			`}`
			`PrintStyle setSkipUnassigned(String value) {`
			`skipUnassigned = value;`
			`return this;`
			`}`
			`PrintStyle setNoLabel(boolean value) {`
			`noLabel = value;`
			`return this;`
			`}`
			`PrintStyle setMakeUppercase(boolean value) {`
			`makeUppercase = value;`
			`return this;`
			`}`
			`PrintStyle setMakeFirstLetterLowercase(boolean value) {`
			`makeFirstLetterLowercase = value;`
			`return this;`
			`}`
			`PrintStyle setSkipValue(String value) {`
			`skipValue = value;`
			`return this;`
			`}`
			`PrintStyle setOrderByRangeStart(boolean value) {`
			`orderByRangeStart = value;`
			`return this;`
			`}`
			`PrintStyle setValueList(boolean value) {`
			`valueList = value;`
			`return this;`
			`}`
			`}`
			`static PrintStyle DEFAULT_PRINT_STYLE = new PrintStyle();`
			`static Comparator skeletonComparator = new UnicodeProperty.SkeletonComparator();`
			`static Map printStyles = new TreeMap(/skeletonComparator/);`
			`static {`
			`printStyles.put("Script", new PrintStyle().setLongForm(true)`
			`.setMakeUppercase(true).setSkipUnassigned("Common"));`
			`printStyles.put("Age", new PrintStyle().setNoLabel(true));`
			`printStyles.put("Numeric_Type", new PrintStyle().setLongForm(true)`
			`.setMakeFirstLetterLowercase(true).setSkipUnassigned("none"));`
			`printStyles.put("General_Category", new PrintStyle().setNoLabel(true)`
			`//.setSkipUnassigned(true)`
			`);`
			`printStyles.put("Line_Break", new PrintStyle().setSkipUnassigned("Unknown"));`
			`printStyles.put("Joining_Type", new PrintStyle().setSkipValue("Non_Joining"));`
			`printStyles.put("Joining_Group", new PrintStyle().setSkipValue("No_Joining_Group")`
			`.setMakeUppercase(true));`
			`printStyles.put("East_Asian_Width", new PrintStyle().setSkipUnassigned("Neutral"));`
			`printStyles.put("Decomposition_Type", new PrintStyle().setLongForm(true)`
			`.setSkipValue("None").setMakeFirstLetterLowercase(true));`
			`printStyles.put("Bidi_Class", new PrintStyle().setSkipUnassigned("Left_To_Right"));`
			`printStyles.put("Block", new PrintStyle().setNoLabel(true)`
			`.setValueList(true));`
			`printStyles.put("Age", new PrintStyle().setSkipValue("unassigned"));`
			`printStyles.put("Canonical_Combining_Class", new PrintStyle().setSkipValue("0"));`
			`printStyles.put("Hangul_Syllable_Type", new PrintStyle().setSkipValue("NA"));`

			`}`
			`//PropertyAliases`
			`//PropertyValueAliases`
			`//CompositionExclusions`
			`//SpecialCasing`
			`//NormalizationTest`
			`//add("CaseFolding", new String[] {"CaseFolding"});`
			`static Map contents = new TreeMap();`
			`static void add(String name, String[] properties) {`
			`contents.put(name, properties);`
			`}`
			`static {`
			`add("Blocks", new String[] {"Block"});`
			`add("DerivedAge", new String[] {"Age"});`
			`add("Scripts", new String[] {"Script"});`
			`add("HangulSyllableType", new String[] {"HangulSyllableType"});`
			`if (false) add("DerivedNormalizationProps", new String[] {`
			`"FNC", "Full_Composition_Exclusion",`
			`"NFD_QuickCheck", "NFC_QuickCheck", "NFKD_QuickCheck", "NFKC_QuickCheck",`
			`"Expands_On_NFD", "Expands_On_NFC", "Expands_On_NFKD", "Expands_On_NFKC"});`

			`add("DerivedBidiClass", new String[] {"BidiClass"});`
			`add("DerivedBinaryProperties", new String[] {"BidiMirrored"});`
			`add("DerivedCombiningClass", new String[] {"CanonicalCombiningClass"});`
			`add("DerivedDecompositionType", new String[] {"DecompositionType"});`
			`add("DerivedEastAsianWidth", new String[] {"EastAsianWidth"});`
			`add("DerivedGeneralCategory", new String[] {"GeneralCategory"});`
			`add("DerivedJoiningGroup", new String[] {"JoiningGroup"});`
			`add("DerivedJoiningType", new String[] {"JoiningType"});`
			`add("DerivedLineBreak", new String[] {"LineBreak"});`
			`add("DerivedNumericType", new String[] {"NumericType"});`
			`add("DerivedNumericValues", new String[] {"NumericValue"});`
			`add("PropList", new String[] {`
			`"White_Space", "Bidi_Control", "Join_Control",`
			`"Dash", "Hyphen", "Quotation_Mark",`
			`"Terminal_Punctuation", "Other_Math",`
			`"Hex_Digit", "ASCII_Hex_Digit",`
			`"Other_Alphabetic",`
			`"Ideographic",`
			`"Diacritic", "Extender",`
			`"Other_Lowercase", "Other_Uppercase",`
			`"Noncharacter_Code_Point",`
			`"Other_Grapheme_Extend",`
			`"Grapheme_Link",`
			`"IDS_Binary_Operator", "IDS_Trinary_Operator",`
			`"Radical", "Unified_Ideograph",`
			`"Other_Default_Ignorable_Code_Point",`
			`"Deprecated", "Soft_Dotted",`
			`"Logical_Order_Exception",`
			`"Other_ID_Start"`
			`});`
			`add("DerivedCoreProperties", new String[] {`
			`"Math", "Alphabetic", "Lowercase", "Uppercase",`
			`"ID_Start", "ID_Continue",`
			`"XID_Start", "XID_Continue",`
			`"Default_Ignorable_Code_Point",`
			`"Grapheme_Extend", "Grapheme_Base"`
			`});`
			`}`

			`public static void generateFile(String atOrAfter, String atOrBefore) throws IOException {`
			`Iterator it = contents.keySet().iterator();`
			`while (it.hasNext()) {`
			`String propname = (String) it.next();`
			`if (propname.compareTo(atOrAfter) < 0) continue;`
			`if (propname.compareTo(atOrBefore) > 0) continue;`
			`generateFile(propname);`
			`}`
			`}`

			`public static void generateFile(String filename) throws IOException {`
			`String[] propList = (String[]) contents.get(filename);`
			`UnicodeDataFile udf = UnicodeDataFile.openAndWriteHeader("DerivedDataTest/", filename);`
			`PrintWriter pw = udf.out; // bf2.openUTF8Writer(UCD_Types.GEN_DIR, "Test" + filename + ".txt");`
			`UnicodeProperty.Factory toolFactory`
			`= ToolUnicodePropertySource.make(Default.ucdVersion());`
			`BagFormatter bf2 = new BagFormatter(toolFactory);`
			`UnicodeSet unassigned = toolFactory.getSet("gc=cn")`
			`.addAll(toolFactory.getSet("gc=cs"));`
			`//System.out.println(unassigned.toPattern(true));`
			`// .removeAll(toolFactory.getSet("noncharactercodepoint=true"));`
			`String separator = bf2.getLineSeparator()`
			`+ "# ================================================"`
			`+ bf2.getLineSeparator() + bf2.getLineSeparator();`

			`for (int i = 0; i < propList.length; ++i) {`
			`UnicodeProperty prop = toolFactory.getProperty(propList[i]);`
			`System.out.println(prop.getName());`
			`pw.print(separator);`
			`PrintStyle ps = (PrintStyle) printStyles.get(prop.getName());`
			`if (ps == null) {`
			`ps = DEFAULT_PRINT_STYLE;`
			`System.out.println("Using default style!");`
			`}`
			`if (ps.noLabel) bf2.setLabelSource(null);`

			`if (ps.valueList) {`
			`bf2.setValueSource(new UnicodeProperty.FilteredProperty(prop, new ReplaceFilter()))`
			`.setNameSource(null)`
			`.setShowCount(false)`
			`.showSetNames(pw,new UnicodeSet(0,0x10FFFF));`
			`} else if (prop.getType() <= prop.EXTENDED_BINARY) {`
			`UnicodeSet s = prop.getSet("True");`
			`bf2.setValueSource(prop.getName());`
			`bf2.showSetNames(pw, s);`
			`} else {`
			`bf2.setValueSource(prop);`
			`Collection aliases = prop.getAvailableValueAliases();`
			`if (ps.orderByRangeStart) {`
			`System.out.println("Reordering");`
			`TreeSet temp2 = new TreeSet(new RangeStartComparator(prop));`
			`temp2.addAll(aliases);`
			`aliases = temp2;`
			`}`
			`Iterator it = aliases.iterator();`
			`while (it.hasNext()) {`
			`String value = (String)it.next();`
			`UnicodeSet s = prop.getSet(value);`

			`System.out.println(value + "\t" + prop.getShortestValueAlias(value) + "\t" + ps.skipValue);`
			`System.out.println(s.toPattern(true));`

			`if (skeletonComparator.compare(value, ps.skipValue) == 0) continue;`
			`if (skeletonComparator.compare(value, ps.skipUnassigned) == 0) {`
			`s.removeAll(unassigned);`
			`}`

			`if (s.size() == 0) continue;`
			`//if (unassigned.containsAll(s)) continue; // skip if all unassigned`
			`//if (s.contains(0xD0000)) continue; // skip unassigned`
			`pw.print(separator);`
			`if (!ps.longForm) value = prop.getShortestValueAlias(value);`
			`if (ps.makeUppercase) value = value.toUpperCase(Locale.ENGLISH);`
			`if (ps.makeFirstLetterLowercase) {`
			`// NOTE: this is ok since we are only working in ASCII`
			`value = value.substring(0,1).toLowerCase(Locale.ENGLISH)`
			`+ value.substring(1);`
			`}`
			`bf2.setValueSource(value);`
			`bf2.showSetNames(pw, s);`
			`}`
			`}`
			`}`
			`udf.close();`
			`}`
			`static class RangeStartComparator implements Comparator {`
			`UnicodeProperty prop;`
			`CompareProperties.UnicodeSetComparator comp = new CompareProperties.UnicodeSetComparator();`
			`RangeStartComparator(UnicodeProperty prop) {`
			`this.prop = prop;`
			`}`
			`public int compare(Object o1, Object o2) {`
			`UnicodeSet s1 = prop.getSet((String)o1);`
			`UnicodeSet s2 = prop.getSet((String)o2);`
			`if (true) System.out.println("comparing " + o1 + ", " + o2`
			`+ s1.toPattern(true) + "?" + s2.toPattern(true)`
			`+ ", " + comp.compare(s1, s2));`
			`return comp.compare(s1, s2);`
			`}`

			`}`

			`public static class ReplaceFilter extends UnicodeProperty.StringFilter {`
			`public String remap(String original) {`
			`return original.replace('_',' ');`
			`}`
			`}`



			`}`