package com.ibm.text.UCD; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.IOException; import java.io.PrintWriter; import java.io.UnsupportedEncodingException; import java.lang.reflect.Field; import java.text.ParseException; import java.text.ParsePosition; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.ResourceBundle; import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; import java.util.regex.Matcher; import java.util.regex.Pattern; import com.ibm.icu.dev.test.util.BagFormatter; import com.ibm.icu.dev.test.util.Tabber; import com.ibm.icu.dev.test.util.UnicodeLabel; import com.ibm.icu.dev.test.util.UnicodeProperty; import com.ibm.icu.text.NumberFormat; import com.ibm.icu.text.SymbolTable; import com.ibm.icu.text.UTF16; import com.ibm.icu.text.UnicodeMatcher; import com.ibm.icu.text.UnicodeSet; import com.ibm.text.UCD.GenerateBreakTest.GenerateGraphemeBreakTest; import com.ibm.text.UCD.GenerateBreakTest.GenerateLineBreakTest; import com.ibm.text.UCD.GenerateBreakTest.GenerateSentenceBreakTest; import com.ibm.text.UCD.GenerateBreakTest.GenerateWordBreakTest; import com.ibm.text.UCD.MakeUnicodeFiles.Format.PrintStyle; import com.ibm.text.utility.UnicodeDataFile; import com.ibm.text.utility.Utility; import com.ibm.icu.text.Collator; public class MakeUnicodeFiles { public static int dVersion = -1; // change to fix the generated file D version. If less than zero, no "d" /*static String[] hackNameList = { "noBreak", "Arabic_Presentation_Forms-A", "Arabic_Presentation_Forms-B", "CJK_Symbols_and_Punctuation", "Combining_Diacritical_Marks_for_Symbols", "Enclosed_CJK_Letters_and_Months", "Greek_and_Coptic", "Halfwidth_and_Fullwidth_Forms", "Latin-1_Supplement", "Latin_Extended-A", "Latin_Extended-B", "Miscellaneous_Mathematical_Symbols-A", "Miscellaneous_Mathematical_Symbols-B", "Miscellaneous_Symbols_and_Arrows", "Superscripts_and_Subscripts", "Supplemental_Arrows-A", "Supplemental_Arrows-B", "Supplementary_Private_Use_Area-A", "Supplementary_Private_Use_Area-B", "Canadian-Aboriginal", "Old-Italic" }; static { for (int i = 0; i < hackNameList.length; ++i) { System.out.println("HackName:\t" + hackNameList[i]); } } */ static boolean DEBUG = false; public static void main(String[] args) throws IOException { generateFile(); } static class Format { public static Format theFormat = new Format(); // singleton Map printStyleMap = new TreeMap(UnicodeProperty.PROPERTY_COMPARATOR); static PrintStyle DEFAULT_PRINT_STYLE = new PrintStyle(); Map fileToPropertySet = new TreeMap(); Map fileToComments = new TreeMap(); Map fileToDirectory = new TreeMap(); TreeMap propertyToValueToComments = new TreeMap(); Map hackMap = new HashMap(); UnicodeProperty.MapFilter hackMapFilter; String[] filesToDo; private Format(){ build(); } /* static String[] FILE_OPTIONS = { "Script nameStyle=none makeUppercase skipUnassigned=Common hackValues", "Age nameStyle=none noLabel skipValue=unassigned", "Numeric_Type nameStyle=none makeFirstLetterLowercase skipValue=None", "General_Category nameStyle=none valueStyle=short noLabel", "Line_Break nameStyle=none valueStyle=short skipUnassigned=Unknown", "Joining_Type nameStyle=none valueStyle=short skipValue=Non_Joining", "Joining_Group nameStyle=none skipValue=No_Joining_Group makeUppercase", "East_Asian_Width nameStyle=none valueStyle=short skipUnassigned=Neutral", "Decomposition_Type nameStyle=none skipValue=None makeFirstLetterLowercase hackValues", "Bidi_Class nameStyle=none valueStyle=short skipUnassigned=Left_To_Right", "Block nameStyle=none noLabel valueList", "Canonical_Combining_Class nameStyle=none valueStyle=short skipUnassigned=Not_Reordered longValueHeading=ccc", "Hangul_Syllable_Type nameStyle=none valueStyle=short skipValue=Not_Applicable", "NFD_Quick_Check nameStyle=short valueStyle=short skipValue=Yes", "NFC_Quick_Check nameStyle=short valueStyle=short skipValue=Yes", "NFKC_Quick_Check nameStyle=short valueStyle=short skipValue=Yes", "NFKD_Quick_Check nameStyle=short valueStyle=short skipValue=Yes", "FC_NFKC_Closure nameStyle=short" }; */ void printFileComments(PrintWriter pw, String filename) { String fileComments = (String) fileToComments.get(filename); if (fileComments != null) pw.println(fileComments); } private void addPrintStyle(String options) { PrintStyle result = new PrintStyle(); printStyleMap.put(result.parse(options), result); } public PrintStyle getPrintStyle(String propname) { PrintStyle result = (PrintStyle) printStyleMap.get(propname); if (result != null) return result; if (DEBUG) System.out.println("Using default style!"); return DEFAULT_PRINT_STYLE; } public static class PrintStyle { boolean noLabel = false; boolean makeUppercase = false; boolean makeFirstLetterLowercase = false; boolean orderByRangeStart = false; boolean interleaveValues = false; boolean hackValues = false; String nameStyle = "none"; String valueStyle = "long"; String skipValue = null; String skipUnassigned = null; String longValueHeading = null; boolean sortNumeric = false; String parse(String options) { options = options.replace('\t', ' '); String[] pieces = Utility.split(options, ' '); for (int i = 1; i < pieces.length; ++i) { String piece = pieces[i]; // binary if (piece.equals("noLabel")) noLabel = true; else if (piece.equals("makeUppercase")) makeUppercase = true; else if (piece.equals("makeFirstLetterLowercase")) makeFirstLetterLowercase = true; else if (piece.equals("orderByRangeStart")) orderByRangeStart = true; else if (piece.equals("valueList")) interleaveValues = true; else if (piece.equals("hackValues")) hackValues = true; else if (piece.equals("sortNumeric")) sortNumeric = true; // with parameter else if (piece.startsWith("valueStyle=")) valueStyle = afterEquals(piece); else if (piece.startsWith("nameStyle=")) nameStyle = afterEquals(piece); else if (piece.startsWith("longValueHeading=")) longValueHeading = afterEquals(piece); else if (piece.startsWith("skipValue=")) { if (skipUnassigned != null) throw new IllegalArgumentException("Can't have both skipUnassigned and skipValue"); skipValue = afterEquals(piece); } else if (piece.startsWith("skipUnassigned=")) { if (skipValue != null) throw new IllegalArgumentException("Can't have both skipUnassigned and skipValue"); skipUnassigned = afterEquals(piece); } else if (piece.length() != 0) { throw new IllegalArgumentException( "Illegal PrintStyle Parameter: " + piece + " in " + pieces[0]); } } return pieces[0]; } public String toString() { Class myClass = getClass(); String result = myClass.getName() + "\n"; Field[] myFields = myClass.getDeclaredFields(); for (int i = 0; i < myFields.length; ++i) { String value = ""; try { Object obj = myFields[i].get(this); if (obj == null) value = ""; else value = obj.toString(); } catch (Exception e) {} result += "\t" + myFields[i].getName() + "=<" + value + ">\n"; } return result; } } /* static { for (int i = 0; i < FILE_OPTIONS.length; ++i) { PrintStyle.add(FILE_OPTIONS[i]); } } */ void addValueComments(String property, String value, String comments) { if (DEBUG) showPVC(property, value, comments); TreeMap valueToComments = (TreeMap) propertyToValueToComments.get(property); if (valueToComments == null) { valueToComments = new TreeMap(); propertyToValueToComments.put(property, valueToComments); } valueToComments.put(value, comments); if (DEBUG && property.equals("BidiClass")) { getValueComments(property, value); } } private void showPVC(String property, String value, String comments) { System.out.println( "Putting Property: <" + property + ">, Value: <" + value + ">, Comments: <" + comments + ">"); } String getValueComments(String property, String value) { TreeMap valueToComments = (TreeMap) propertyToValueToComments.get(property); String result = null; if (valueToComments != null) result = (String) valueToComments.get(value); if (DEBUG) System.out.println("Getting Property: <" + property + ">, Value: <" + value + ">, Comment: <" + result + ">"); return result; } Map getValue2CommentsMap(String property) { return (Map) propertyToValueToComments.get(property); } static String afterEquals(String source) { return source.substring(source.indexOf('=') + 1); } static String afterWhitespace(String source) { // Note: don't need to be international for (int i = 0; i < source.length(); ++i) { char ch = source.charAt(i); if (Character.isWhitespace(source.charAt(i))) { return source.substring(i).trim(); } } return ""; } /*private void add(String name, String[] properties) { fileToPropertySet.put(name, properties); }*/ private void build() { /* for (int i = 0; i < hackNameList.length; ++i) { String item = hackNameList[i]; String regularItem = UnicodeProperty.regularize(item,true); hackMap.put(regularItem, item); } */ /* for (int i = 0; i < UCD_Names.UNIFIED_PROPERTIES.length; ++i) { String name = Utility.getUnskeleton(UCD_Names.UNIFIED_PROPERTIES[i], false); valueComments.add(name, "*", "# " + UCD_Names.UNIFIED_PROPERTY_HEADERS[i]); System.out.println(); System.out.println(name); System.out.println("# " + UCD_Names.UNIFIED_PROPERTY_HEADERS[i]); } // HACK valueComments.add("Bidi_Mirroring", "*", "# " + UCD_Names.UNIFIED_PROPERTY_HEADERS[9]); */ try { BufferedReader br = Utility.openReadFile("MakeUnicodeFiles.txt", Utility.UTF8); String key = null; String file = null, property = null, value = "", comments = ""; while (true) { String line = br.readLine(); if (line == null) break; line = line.trim(); if (line.length() == 0) continue; if (DEBUG) System.out.println("\t" + line); String lineValue = afterWhitespace(line); if (line.startsWith("Format:")) { addPrintStyle(property + " " + lineValue); // fix later } else if (line.startsWith("#")) { if (comments.length() != 0) comments += "\n"; comments += line; } else { // end of comments, roll up if (property != null) addValueComments(property, value, comments); comments = ""; if (line.startsWith("Generate:")) { filesToDo = Utility.split(lineValue.trim(), ' '); if (filesToDo.length == 0 || (filesToDo.length == 1 && filesToDo[0].length() == 0)) { filesToDo = new String[] {".*"}; } } else if (line.startsWith("DeltaVersion:")) { dVersion = Integer.parseInt(lineValue); } else if (line.startsWith("CopyrightYear:")) { Default.setYear(lineValue); } else if (line.startsWith("File:")) { int p2 = lineValue.lastIndexOf('/'); file = lineValue.substring(p2+1); if (p2 >= 0) { fileToDirectory.put(file, lineValue.substring(0,p2+1)); } property = null; } else if (line.startsWith("Property:")) { property = lineValue; addPropertyToFile(file, property); value = ""; } else if (line.startsWith("Value:")) { value = lineValue; } else if (line.startsWith("HackName:")) { String regularItem = UnicodeProperty.regularize(lineValue, true); hackMap.put(regularItem, lineValue); } else if (line.startsWith("FinalComments")) { break; } else { throw new IllegalArgumentException("Unknown command: " + line); } } } br.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); throw new IllegalArgumentException("File missing"); } hackMapFilter = new UnicodeProperty.MapFilter(hackMap); /* add("PropertyValueAliases", null); add("PropertyAliases", null); add("SpecialCasing", null); add("NormalizationTest", null); add("StandardizedVariants", null); add("CaseFolding", null); add("DerivedAge", new String[] {"Age"}); add("Scripts", new String[] {"Script"}); add("HangulSyllableType", new String[] {"HangulSyllableType"}); add("DerivedBidiClass", new String[] {"BidiClass"}); add("DerivedBinaryProperties", new String[] {"BidiMirrored"}); add("DerivedCombiningClass", new String[] {"CanonicalCombiningClass"}); add("DerivedDecompositionType", new String[] {"DecompositionType"}); add("DerivedEastAsianWidth", new String[] {"EastAsianWidth"}); add("DerivedGeneralCategory", new String[] {"GeneralCategory"}); add("DerivedJoiningGroup", new String[] {"JoiningGroup"}); add("DerivedJoiningType", new String[] {"JoiningType"}); add("DerivedLineBreak", new String[] {"LineBreak"}); add("DerivedNumericType", new String[] {"NumericType"}); add("DerivedNumericValues", new String[] {"NumericValue"}); add("PropList", new String[] { "White_Space", "Bidi_Control", "Join_Control", "Dash", "Hyphen", "Quotation_Mark", "Terminal_Punctuation", "Other_Math", "Hex_Digit", "ASCII_Hex_Digit", "Other_Alphabetic", "Ideographic", "Diacritic", "Extender", "Other_Lowercase", "Other_Uppercase", "Noncharacter_Code_Point", "Other_Grapheme_Extend", "Grapheme_Link", "IDS_Binary_Operator", "IDS_Trinary_Operator", "Radical", "Unified_Ideograph", "Other_Default_Ignorable_Code_Point", "Deprecated", "Soft_Dotted", "Logical_Order_Exception", "Other_ID_Start" }); add("DerivedCoreProperties", new String[] { "Math", "Alphabetic", "Lowercase", "Uppercase", "ID_Start", "ID_Continue", "XID_Start", "XID_Continue", "Default_Ignorable_Code_Point", "Grapheme_Extend", "Grapheme_Base" }); add("DerivedNormalizationProps", new String[] { "FC_NFKC_Closure", "Full_Composition_Exclusion", "NFD_QuickCheck", "NFC_QuickCheck", "NFKD_QuickCheck", "NFKC_QuickCheck", "Expands_On_NFD", "Expands_On_NFC", "Expands_On_NFKD", "Expands_On_NFKC" }); */ write(); } private void write() { TreeMap fileoptions = new TreeMap(); /*for (int i = 0; i < FILE_OPTIONS.length; ++i) { String s = FILE_OPTIONS[i]; int pos = s.indexOf(' '); String name = s.substring(0,pos); String options = s.substring(pos).trim(); fileoptions.put(name, options); } */ for (Iterator it = fileToPropertySet.keySet().iterator(); it.hasNext();) { String key = (String) it.next(); if (DEBUG) { System.out.println(); System.out.println("File:\t" + key); } List propList2 = (List) fileToPropertySet.get(key); if (propList2 == null) { System.out.println("SPECIAL"); continue; } for (Iterator pIt = propList2.iterator(); pIt.hasNext();) { String prop = (String) pIt.next(); String options = (String)fileoptions.get(prop); if (DEBUG) { System.out.println(); System.out.println("Property:\t" + prop); if (options != null) { System.out.println("Format:\t" + options); } } Map vc = getValue2CommentsMap(prop); if (vc == null) continue; for (Iterator it2 = vc.keySet().iterator(); it2.hasNext();) { String value = (String) it2.next(); String comment = (String) vc.get(value); if (DEBUG) { if (!value.equals("")) { System.out.println("Value:\t" + value); } System.out.println(comment); } } } } } private void addCommentToFile(String filename, String comment) { fileToComments.put(filename, comment); } private void addPropertyToFile(String filename, String property) { List properties = (List) fileToPropertySet.get(filename); if (properties == null) { properties = new ArrayList(1); fileToPropertySet.put(filename, properties); } properties.add(property); } public List getPropertiesFromFile(String filename) { return (List) fileToPropertySet.get(filename); } public Set getFiles() { return fileToPropertySet.keySet(); } } public static void generateFile() throws IOException { for (int i = 0; i < Format.theFormat.filesToDo.length; ++i) { String fileNamePattern = Format.theFormat.filesToDo[i].trim(); // .toLowerCase(Locale.ENGLISH); Matcher matcher = Pattern.compile(fileNamePattern, Pattern.CASE_INSENSITIVE).matcher(""); Iterator it = Format.theFormat.getFiles().iterator(); boolean gotOne = false; while (it.hasNext()) { String propname = (String) it.next(); if (!matcher.reset(propname).matches()) continue; //if (!propname.toLowerCase(Locale.ENGLISH).startsWith(fileName)) continue; generateFile(propname); gotOne = true; } if (!gotOne) { throw new IllegalArgumentException( "Non-matching file name: " + fileNamePattern); } } } public static void generateFile(String filename) throws IOException { if (filename.endsWith("Aliases")) { if (filename.endsWith("ValueAliases")) generateValueAliasFile(filename); else generateAliasFile(filename); } else if (filename.equals("NormalizationTest")) { GenerateData.writeNormalizerTestSuite("DerivedData/", "NormalizationTest"); } else if (filename.equals("CaseFolding")) { GenerateCaseFolding.makeCaseFold(false); } else if (filename.equals("SpecialCasing")) { GenerateCaseFolding.generateSpecialCasing(false); } else if (filename.equals("StandardizedVariants")) { GenerateStandardizedVariants.generate(); } else if (filename.equals("NamedSequences")) { GenerateNamedSequences.generate(); } else if (filename.equals("GraphemeBreakTest")) { new GenerateGraphemeBreakTest(Default.ucd()).run(); } else if (filename.equals("WordBreakTest")) { new GenerateWordBreakTest(Default.ucd()).run(); } else if (filename.equals("LineBreakTest")) { new GenerateLineBreakTest(Default.ucd()).run(); } else if (filename.equals("SentenceBreakTest")) { new GenerateSentenceBreakTest(Default.ucd()).run(); } else { generatePropertyFile(filename); } } static final String SEPARATOR = "# ================================================"; public static void generateAliasFile(String filename) throws IOException { UnicodeDataFile udf = UnicodeDataFile.openAndWriteHeader("DerivedData/", filename); PrintWriter pw = udf.out; UnicodeProperty.Factory ups = ToolUnicodePropertySource.make(Default.ucdVersion()); TreeSet sortedSet = new TreeSet(CASELESS_COMPARATOR); BagFormatter bf = new BagFormatter(); Tabber.MonoTabber mt = new Tabber.MonoTabber() .add(10,Tabber.LEFT) .add(30,Tabber.LEFT); int count = 0; for (int i = UnicodeProperty.LIMIT_TYPE - 1; i >= UnicodeProperty.BINARY; --i) { if ((i & UnicodeProperty.EXTENDED_MASK) != 0) continue; List list = ups.getAvailableNames(1< 0) { nameStr = nameStr.substring(0,pos) + "%" + nameStr.substring(pos + code.length()); } nameStr += ";" + category.getValue(codepoint, true) + ";" + combiningClass.getValue(codepoint, true) + ";" + bidiClass.getValue(codepoint, true) + ";" ; String temp = decompositionType.getValue(codepoint, true); if (!temp.equals("None")) { nameStr += "<" + temp + "> " + Utility.hex(decompositionValue.getValue(codepoint)); } nameStr += ";"; temp = numericType.getValue(codepoint, true); if (temp.equals("Decimal")) { nameStr += temp + ";" + temp + ";" + temp + ";"; } else if (temp.equals("Digit")) { nameStr += ";" + temp + ";" + temp + ";"; } else if (temp.equals("Numeric")) { nameStr += ";;" + temp + ";"; } else if (temp.equals("Digit")) { nameStr += ";;;"; } if (bidiMirrored.getValue(codepoint, true).equals("True")) { nameStr += "Y" + ";"; } nameStr += ";"; return nameStr; } } /* static class PropertySymbolTable implements SymbolTable { static boolean DEBUG = false; UnicodeProperty.Factory factory; //static Matcher identifier = Pattern.compile("([:letter:] [\\_\\-[:letter:][:number:]]*)").matcher(""); PropertySymbolTable (UnicodeProperty.Factory factory) { this.factory = factory; } public char[] lookup(String s) { if (DEBUG) System.out.println("\tLooking up " + s); int pos = s.indexOf('='); if (pos < 0) return null; // should never happen UnicodeProperty prop = factory.getProperty(s.substring(0,pos)); if (prop == null) { throw new IllegalArgumentException("Invalid Property: " + s + "\r\nUse " + showSet(factory.getAvailableNames())); } String value = s.substring(pos+1); UnicodeSet set = prop.getSet(value); if (set.size() == 0) { throw new IllegalArgumentException("Empty Property-Value: " + s + "\r\nUse " + showSet(prop.getAvailableValues())); } if (DEBUG) System.out.println("\tReturning " + set.toPattern(true)); return set.toPattern(true).toCharArray(); // really ugly } private String showSet(List list) { StringBuffer result = new StringBuffer("["); boolean first = true; for (Iterator it = list.iterator(); it.hasNext();) { if (!first) result.append(", "); else first = false; result.append(it.next().toString()); } result.append("]"); return result.toString(); } public UnicodeMatcher lookupMatcher(int ch) { return null; } public String parseReference(String text, ParsePosition pos, int limit) { if (DEBUG) System.out.println("\tParsing <" + text.substring(pos.getIndex(),limit) + ">"); int start = pos.getIndex(); int i = getIdentifier(text, start, limit); if (i == start) return null; String prop = text.substring(start, i); String value = "true"; if (i < limit) { int cp = text.charAt(i); if (cp == ':' || cp == '=') { int j = getIdentifier(text, i+1, limit); value = text.substring(i+1, j); i = j; } } pos.setIndex(i); if (DEBUG) System.out.println("\tParsed <" + prop + ">=<" + value + ">"); return prop + '=' + value; } private int getIdentifier(String text, int start, int limit) { if (DEBUG) System.out.println("\tGetID <" + text.substring(start,limit) + ">"); int cp = 0; int i; for (i = start; i < limit; i += UTF16.getCharCount(cp)) { cp = UTF16.charAt(text, i); if (!com.ibm.icu.lang.UCharacter.isUnicodeIdentifierPart(cp)) { break; } } if (DEBUG) System.out.println("\tGotID <" + text.substring(start,i) + ">"); return i; } }; /* getCombo(UnicodeProperty.Factory factory, String line) { UnicodeSet result = new UnicodeSet(); String[] pieces = Utility.split(line, '+'); for (int i = 0; i < pieces.length; ++i) { String[] parts = Utility.split(pieces[i],':'); String prop = parts[0].trim(); String value = "true"; if (parts.length > 1) value = parts[1].trim(); UnicodeProperty p = factory.getProperty(prop); result.addAll(p.getSet(value)); } return result; } */ } /* static class OrderedMap { HashMap map = new HashMap(); ArrayList keys = new ArrayList(); void put(Object o, Object t) { map.put(o,t); keys.add(o); } List keyset() { return keys; } } */