557bade86a
X-SVN-Rev: 19520
1309 lines
56 KiB
Java
1309 lines
56 KiB
Java
package com.ibm.text.UCD;
|
|
|
|
import java.io.BufferedReader;
|
|
import java.io.FileNotFoundException;
|
|
import java.io.IOException;
|
|
import java.io.PrintWriter;
|
|
import java.io.UnsupportedEncodingException;
|
|
import java.lang.reflect.Field;
|
|
import java.text.ParseException;
|
|
import java.text.ParsePosition;
|
|
import java.util.ArrayList;
|
|
import java.util.Arrays;
|
|
import java.util.Collection;
|
|
import java.util.Comparator;
|
|
import java.util.HashMap;
|
|
import java.util.HashSet;
|
|
import java.util.Iterator;
|
|
import java.util.List;
|
|
import java.util.Locale;
|
|
import java.util.Map;
|
|
import java.util.ResourceBundle;
|
|
import java.util.Set;
|
|
import java.util.TreeMap;
|
|
import java.util.TreeSet;
|
|
import java.util.regex.Matcher;
|
|
import java.util.regex.Pattern;
|
|
|
|
import com.ibm.icu.dev.test.util.BagFormatter;
|
|
import com.ibm.icu.dev.test.util.Tabber;
|
|
import com.ibm.icu.dev.test.util.UnicodeLabel;
|
|
import com.ibm.icu.dev.test.util.UnicodeProperty;
|
|
import com.ibm.icu.text.NumberFormat;
|
|
import com.ibm.icu.text.SymbolTable;
|
|
import com.ibm.icu.text.UTF16;
|
|
import com.ibm.icu.text.UnicodeMatcher;
|
|
import com.ibm.icu.text.UnicodeSet;
|
|
import com.ibm.text.UCD.GenerateBreakTest.GenerateGraphemeBreakTest;
|
|
import com.ibm.text.UCD.GenerateBreakTest.GenerateLineBreakTest;
|
|
import com.ibm.text.UCD.GenerateBreakTest.GenerateSentenceBreakTest;
|
|
import com.ibm.text.UCD.GenerateBreakTest.GenerateWordBreakTest;
|
|
import com.ibm.text.UCD.MakeUnicodeFiles.Format.PrintStyle;
|
|
import com.ibm.text.utility.UnicodeDataFile;
|
|
import com.ibm.text.utility.Utility;
|
|
import com.ibm.icu.text.Collator;
|
|
|
|
public class MakeUnicodeFiles {
|
|
public static int dVersion = -1; // change to fix the generated file D version. If less than zero, no "d"
|
|
|
|
/*static String[] hackNameList = {
|
|
"noBreak", "Arabic_Presentation_Forms-A", "Arabic_Presentation_Forms-B",
|
|
"CJK_Symbols_and_Punctuation", "Combining_Diacritical_Marks_for_Symbols",
|
|
"Enclosed_CJK_Letters_and_Months", "Greek_and_Coptic",
|
|
"Halfwidth_and_Fullwidth_Forms", "Latin-1_Supplement", "Latin_Extended-A",
|
|
"Latin_Extended-B", "Miscellaneous_Mathematical_Symbols-A",
|
|
"Miscellaneous_Mathematical_Symbols-B", "Miscellaneous_Symbols_and_Arrows",
|
|
"Superscripts_and_Subscripts", "Supplemental_Arrows-A", "Supplemental_Arrows-B",
|
|
"Supplementary_Private_Use_Area-A", "Supplementary_Private_Use_Area-B",
|
|
"Canadian-Aboriginal", "Old-Italic"
|
|
};
|
|
static {
|
|
for (int i = 0; i < hackNameList.length; ++i) {
|
|
System.out.println("HackName:\t" + hackNameList[i]);
|
|
}
|
|
}
|
|
*/
|
|
|
|
static boolean DEBUG = false;
|
|
|
|
public static void main(String[] args) throws IOException {
|
|
generateFile();
|
|
System.out.println("DONE");
|
|
}
|
|
|
|
static class Format {
|
|
public static Format theFormat = new Format(); // singleton
|
|
|
|
|
|
Map printStyleMap = new TreeMap(UnicodeProperty.PROPERTY_COMPARATOR);
|
|
static PrintStyle DEFAULT_PRINT_STYLE = new PrintStyle();
|
|
Map fileToPropertySet = new TreeMap();
|
|
Map fileToComments = new TreeMap();
|
|
Map fileToDirectory = new TreeMap();
|
|
TreeMap propertyToValueToComments = new TreeMap();
|
|
Map hackMap = new HashMap();
|
|
UnicodeProperty.MapFilter hackMapFilter;
|
|
String[] filesToDo;
|
|
|
|
private Format(){
|
|
build();
|
|
}
|
|
/*
|
|
static String[] FILE_OPTIONS = {
|
|
"Script nameStyle=none makeUppercase skipUnassigned=Common hackValues",
|
|
"Age nameStyle=none noLabel skipValue=unassigned",
|
|
"Numeric_Type nameStyle=none makeFirstLetterLowercase skipValue=None",
|
|
"General_Category nameStyle=none valueStyle=short noLabel",
|
|
"Line_Break nameStyle=none valueStyle=short skipUnassigned=Unknown",
|
|
"Joining_Type nameStyle=none valueStyle=short skipValue=Non_Joining",
|
|
"Joining_Group nameStyle=none skipValue=No_Joining_Group makeUppercase",
|
|
"East_Asian_Width nameStyle=none valueStyle=short skipUnassigned=Neutral",
|
|
"Decomposition_Type nameStyle=none skipValue=None makeFirstLetterLowercase hackValues",
|
|
"Bidi_Class nameStyle=none valueStyle=short skipUnassigned=Left_To_Right",
|
|
"Block nameStyle=none noLabel valueList",
|
|
"Canonical_Combining_Class nameStyle=none valueStyle=short skipUnassigned=Not_Reordered longValueHeading=ccc",
|
|
"Hangul_Syllable_Type nameStyle=none valueStyle=short skipValue=Not_Applicable",
|
|
"NFD_Quick_Check nameStyle=short valueStyle=short skipValue=Yes",
|
|
"NFC_Quick_Check nameStyle=short valueStyle=short skipValue=Yes",
|
|
"NFKC_Quick_Check nameStyle=short valueStyle=short skipValue=Yes",
|
|
"NFKD_Quick_Check nameStyle=short valueStyle=short skipValue=Yes",
|
|
"FC_NFKC_Closure nameStyle=short"
|
|
};
|
|
*/
|
|
|
|
void printFileComments(PrintWriter pw, String filename) {
|
|
String fileComments = (String) fileToComments.get(filename);
|
|
if (fileComments != null)
|
|
pw.println(fileComments);
|
|
}
|
|
|
|
private void addPrintStyle(String options) {
|
|
PrintStyle result = new PrintStyle();
|
|
printStyleMap.put(result.parse(options), result);
|
|
}
|
|
|
|
public PrintStyle getPrintStyle(String propname) {
|
|
PrintStyle result = (PrintStyle) printStyleMap.get(propname);
|
|
if (result != null)
|
|
return result;
|
|
if (DEBUG)
|
|
System.out.println("Using default style!");
|
|
return DEFAULT_PRINT_STYLE;
|
|
}
|
|
|
|
public static class PrintStyle {
|
|
boolean noLabel = false;
|
|
boolean makeUppercase = false;
|
|
boolean makeFirstLetterLowercase = false;
|
|
boolean orderByRangeStart = false;
|
|
boolean interleaveValues = false;
|
|
boolean hackValues = false;
|
|
String nameStyle = "none";
|
|
String valueStyle = "long";
|
|
String skipValue = null;
|
|
String skipUnassigned = null;
|
|
String longValueHeading = null;
|
|
boolean sortNumeric = false;
|
|
|
|
String parse(String options) {
|
|
options = options.replace('\t', ' ');
|
|
String[] pieces = Utility.split(options, ' ');
|
|
for (int i = 1; i < pieces.length; ++i) {
|
|
String piece = pieces[i];
|
|
// binary
|
|
if (piece.equals("noLabel"))
|
|
noLabel = true;
|
|
else if (piece.equals("makeUppercase"))
|
|
makeUppercase = true;
|
|
else if (piece.equals("makeFirstLetterLowercase"))
|
|
makeFirstLetterLowercase = true;
|
|
else if (piece.equals("orderByRangeStart"))
|
|
orderByRangeStart = true;
|
|
else if (piece.equals("valueList"))
|
|
interleaveValues = true;
|
|
else if (piece.equals("hackValues"))
|
|
hackValues = true;
|
|
else if (piece.equals("sortNumeric"))
|
|
sortNumeric = true;
|
|
// with parameter
|
|
else if (piece.startsWith("valueStyle="))
|
|
valueStyle = afterEquals(piece);
|
|
else if (piece.startsWith("nameStyle="))
|
|
nameStyle = afterEquals(piece);
|
|
else if (piece.startsWith("longValueHeading="))
|
|
longValueHeading = afterEquals(piece);
|
|
else if (piece.startsWith("skipValue=")) {
|
|
if (skipUnassigned != null)
|
|
throw new IllegalArgumentException("Can't have both skipUnassigned and skipValue");
|
|
skipValue = afterEquals(piece);
|
|
} else if (piece.startsWith("skipUnassigned=")) {
|
|
if (skipValue != null)
|
|
throw new IllegalArgumentException("Can't have both skipUnassigned and skipValue");
|
|
skipUnassigned = afterEquals(piece);
|
|
} else if (piece.length() != 0) {
|
|
throw new IllegalArgumentException(
|
|
"Illegal PrintStyle Parameter: "
|
|
+ piece
|
|
+ " in "
|
|
+ pieces[0]);
|
|
}
|
|
}
|
|
return pieces[0];
|
|
}
|
|
public String toString() {
|
|
Class myClass = getClass();
|
|
String result = myClass.getName() + "\n";
|
|
Field[] myFields = myClass.getDeclaredFields();
|
|
for (int i = 0; i < myFields.length; ++i) {
|
|
String value = "<private>";
|
|
try {
|
|
Object obj = myFields[i].get(this);
|
|
if (obj == null) value = "<null>";
|
|
else value = obj.toString();
|
|
} catch (Exception e) {}
|
|
result += "\t" + myFields[i].getName() + "=<" + value + ">\n";
|
|
}
|
|
return result;
|
|
}
|
|
}
|
|
/*
|
|
static {
|
|
for (int i = 0; i < FILE_OPTIONS.length; ++i) {
|
|
PrintStyle.add(FILE_OPTIONS[i]);
|
|
}
|
|
}
|
|
*/
|
|
|
|
void addValueComments(String property, String value, String comments) {
|
|
if (DEBUG)
|
|
showPVC(property, value, comments);
|
|
TreeMap valueToComments =
|
|
(TreeMap) propertyToValueToComments.get(property);
|
|
if (valueToComments == null) {
|
|
valueToComments = new TreeMap();
|
|
propertyToValueToComments.put(property, valueToComments);
|
|
}
|
|
valueToComments.put(value, comments);
|
|
if (DEBUG && property.equals("BidiClass")) {
|
|
getValueComments(property, value);
|
|
}
|
|
}
|
|
|
|
private void showPVC(String property, String value, String comments) {
|
|
System.out.println(
|
|
"Putting Property: <"
|
|
+ property
|
|
+ ">, Value: <"
|
|
+ value
|
|
+ ">, Comments: <"
|
|
+ comments + ">");
|
|
}
|
|
|
|
String getValueComments(String property, String value) {
|
|
TreeMap valueToComments =
|
|
(TreeMap) propertyToValueToComments.get(property);
|
|
String result = null;
|
|
if (valueToComments != null)
|
|
result = (String) valueToComments.get(value);
|
|
if (DEBUG) System.out.println("Getting Property: <" + property + ">, Value: <"
|
|
+ value + ">, Comment: <" + result + ">");
|
|
return result;
|
|
}
|
|
|
|
Map getValue2CommentsMap(String property) {
|
|
return (Map) propertyToValueToComments.get(property);
|
|
}
|
|
|
|
static String afterEquals(String source) {
|
|
return source.substring(source.indexOf('=') + 1);
|
|
}
|
|
|
|
static String afterWhitespace(String source) {
|
|
// Note: don't need to be international
|
|
for (int i = 0; i < source.length(); ++i) {
|
|
char ch = source.charAt(i);
|
|
if (Character.isWhitespace(source.charAt(i))) {
|
|
return source.substring(i).trim();
|
|
}
|
|
}
|
|
return "";
|
|
}
|
|
|
|
/*private void add(String name, String[] properties) {
|
|
fileToPropertySet.put(name, properties);
|
|
}*/
|
|
|
|
private void build() {
|
|
/*
|
|
for (int i = 0; i < hackNameList.length; ++i) {
|
|
String item = hackNameList[i];
|
|
String regularItem = UnicodeProperty.regularize(item,true);
|
|
hackMap.put(regularItem, item);
|
|
}
|
|
*/
|
|
|
|
/*
|
|
for (int i = 0; i < UCD_Names.UNIFIED_PROPERTIES.length; ++i) {
|
|
String name = Utility.getUnskeleton(UCD_Names.UNIFIED_PROPERTIES[i], false);
|
|
valueComments.add(name, "*", "# " + UCD_Names.UNIFIED_PROPERTY_HEADERS[i]);
|
|
System.out.println();
|
|
System.out.println(name);
|
|
System.out.println("# " + UCD_Names.UNIFIED_PROPERTY_HEADERS[i]);
|
|
}
|
|
// HACK
|
|
valueComments.add("Bidi_Mirroring", "*", "# " + UCD_Names.UNIFIED_PROPERTY_HEADERS[9]);
|
|
*/
|
|
try {
|
|
BufferedReader br =
|
|
Utility.openReadFile("com/ibm/text/UCD/MakeUnicodeFiles.txt", Utility.UTF8);
|
|
String key = null;
|
|
String file = null, property = null, value = "", comments = "";
|
|
while (true) {
|
|
String line = br.readLine();
|
|
if (line == null)
|
|
break;
|
|
line = line.trim();
|
|
if (line.length() == 0)
|
|
continue;
|
|
if (DEBUG)
|
|
System.out.println("\t" + line);
|
|
String lineValue = afterWhitespace(line);
|
|
if (line.startsWith("Format:")) {
|
|
addPrintStyle(property + " " + lineValue); // fix later
|
|
} else if (line.startsWith("#")) {
|
|
if (comments.length() != 0) comments += "\n";
|
|
comments += line;
|
|
} else {
|
|
// end of comments, roll up
|
|
if (property != null)
|
|
addValueComments(property, value, comments);
|
|
comments = "";
|
|
if (line.startsWith("Generate:")) {
|
|
filesToDo = Utility.split(lineValue.trim(), ' ');
|
|
if (filesToDo.length == 0
|
|
|| (filesToDo.length == 1 && filesToDo[0].length() == 0)) {
|
|
filesToDo = new String[] {".*"};
|
|
}
|
|
} else if (line.startsWith("DeltaVersion:")) {
|
|
dVersion = Integer.parseInt(lineValue);
|
|
} else if (line.startsWith("CopyrightYear:")) {
|
|
Default.setYear(lineValue);
|
|
} else if (line.startsWith("File:")) {
|
|
int p2 = lineValue.lastIndexOf('/');
|
|
file = lineValue.substring(p2+1);
|
|
if (p2 >= 0) {
|
|
fileToDirectory.put(file, lineValue.substring(0,p2+1));
|
|
}
|
|
property = null;
|
|
} else if (line.startsWith("Property:")) {
|
|
property = lineValue;
|
|
addPropertyToFile(file, property);
|
|
value = "";
|
|
} else if (line.startsWith("Value:")) {
|
|
value = lineValue;
|
|
} else if (line.startsWith("HackName:")) {
|
|
String regularItem =
|
|
UnicodeProperty.regularize(lineValue, true);
|
|
hackMap.put(regularItem, lineValue);
|
|
} else if (line.startsWith("FinalComments")) {
|
|
break;
|
|
} else {
|
|
throw new IllegalArgumentException("Unknown command: " + line);
|
|
}
|
|
}
|
|
}
|
|
br.close();
|
|
} catch (IOException e) {
|
|
// TODO Auto-generated catch block
|
|
e.printStackTrace();
|
|
throw new IllegalArgumentException("File missing");
|
|
}
|
|
hackMapFilter = new UnicodeProperty.MapFilter(hackMap);
|
|
/*
|
|
add("PropertyValueAliases", null);
|
|
add("PropertyAliases", null);
|
|
add("SpecialCasing", null);
|
|
add("NormalizationTest", null);
|
|
add("StandardizedVariants", null);
|
|
add("CaseFolding", null);
|
|
add("DerivedAge", new String[] {"Age"});
|
|
add("Scripts", new String[] {"Script"});
|
|
add("HangulSyllableType", new String[] {"HangulSyllableType"});
|
|
add("DerivedBidiClass", new String[] {"BidiClass"});
|
|
add("DerivedBinaryProperties", new String[] {"BidiMirrored"});
|
|
add("DerivedCombiningClass", new String[] {"CanonicalCombiningClass"});
|
|
add("DerivedDecompositionType", new String[] {"DecompositionType"});
|
|
add("DerivedEastAsianWidth", new String[] {"EastAsianWidth"});
|
|
add("DerivedGeneralCategory", new String[] {"GeneralCategory"});
|
|
add("DerivedJoiningGroup", new String[] {"JoiningGroup"});
|
|
add("DerivedJoiningType", new String[] {"JoiningType"});
|
|
add("DerivedLineBreak", new String[] {"LineBreak"});
|
|
add("DerivedNumericType", new String[] {"NumericType"});
|
|
add("DerivedNumericValues", new String[] {"NumericValue"});
|
|
add("PropList", new String[] {
|
|
"White_Space", "Bidi_Control", "Join_Control",
|
|
"Dash", "Hyphen", "Quotation_Mark",
|
|
"Terminal_Punctuation", "Other_Math",
|
|
"Hex_Digit", "ASCII_Hex_Digit",
|
|
"Other_Alphabetic",
|
|
"Ideographic",
|
|
"Diacritic", "Extender",
|
|
"Other_Lowercase", "Other_Uppercase",
|
|
"Noncharacter_Code_Point",
|
|
"Other_Grapheme_Extend",
|
|
"Grapheme_Link",
|
|
"IDS_Binary_Operator", "IDS_Trinary_Operator",
|
|
"Radical", "Unified_Ideograph",
|
|
"Other_Default_Ignorable_Code_Point",
|
|
"Deprecated", "Soft_Dotted",
|
|
"Logical_Order_Exception",
|
|
"Other_ID_Start"
|
|
});
|
|
add("DerivedCoreProperties", new String[] {
|
|
"Math", "Alphabetic", "Lowercase", "Uppercase",
|
|
"ID_Start", "ID_Continue",
|
|
"XID_Start", "XID_Continue",
|
|
"Default_Ignorable_Code_Point",
|
|
"Grapheme_Extend", "Grapheme_Base"
|
|
});
|
|
add("DerivedNormalizationProps", new String[] {
|
|
"FC_NFKC_Closure",
|
|
"Full_Composition_Exclusion",
|
|
"NFD_QuickCheck", "NFC_QuickCheck", "NFKD_QuickCheck", "NFKC_QuickCheck",
|
|
"Expands_On_NFD", "Expands_On_NFC", "Expands_On_NFKD", "Expands_On_NFKC"
|
|
});
|
|
*/
|
|
write();
|
|
}
|
|
|
|
private void write() {
|
|
TreeMap fileoptions = new TreeMap();
|
|
/*for (int i = 0; i < FILE_OPTIONS.length; ++i) {
|
|
String s = FILE_OPTIONS[i];
|
|
int pos = s.indexOf(' ');
|
|
String name = s.substring(0,pos);
|
|
String options = s.substring(pos).trim();
|
|
fileoptions.put(name, options);
|
|
}
|
|
*/
|
|
for (Iterator it = fileToPropertySet.keySet().iterator(); it.hasNext();) {
|
|
String key = (String) it.next();
|
|
if (DEBUG) {
|
|
System.out.println();
|
|
System.out.println("File:\t" + key);
|
|
}
|
|
List propList2 = (List) fileToPropertySet.get(key);
|
|
if (propList2 == null) {
|
|
System.out.println("SPECIAL");
|
|
continue;
|
|
}
|
|
for (Iterator pIt = propList2.iterator(); pIt.hasNext();) {
|
|
String prop = (String) pIt.next();
|
|
String options = (String)fileoptions.get(prop);
|
|
if (DEBUG) {
|
|
System.out.println();
|
|
System.out.println("Property:\t" + prop);
|
|
if (options != null) {
|
|
System.out.println("Format:\t" + options);
|
|
}
|
|
}
|
|
Map vc = getValue2CommentsMap(prop);
|
|
if (vc == null) continue;
|
|
for (Iterator it2 = vc.keySet().iterator(); it2.hasNext();) {
|
|
String value = (String) it2.next();
|
|
String comment = (String) vc.get(value);
|
|
if (DEBUG) {
|
|
if (!value.equals("")) {
|
|
System.out.println("Value:\t" + value);
|
|
}
|
|
System.out.println(comment);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
private void addCommentToFile(String filename, String comment) {
|
|
fileToComments.put(filename, comment);
|
|
}
|
|
|
|
private void addPropertyToFile(String filename, String property) {
|
|
List properties = (List) fileToPropertySet.get(filename);
|
|
if (properties == null) {
|
|
properties = new ArrayList(1);
|
|
fileToPropertySet.put(filename, properties);
|
|
}
|
|
properties.add(property);
|
|
}
|
|
public List getPropertiesFromFile(String filename) {
|
|
return (List) fileToPropertySet.get(filename);
|
|
}
|
|
public Set getFiles() {
|
|
return fileToPropertySet.keySet();
|
|
}
|
|
}
|
|
|
|
public static void generateFile() throws IOException {
|
|
for (int i = 0; i < Format.theFormat.filesToDo.length; ++i) {
|
|
String fileNamePattern =
|
|
Format.theFormat.filesToDo[i].trim(); // .toLowerCase(Locale.ENGLISH);
|
|
Matcher matcher = Pattern.compile(fileNamePattern, Pattern.CASE_INSENSITIVE).matcher("");
|
|
Iterator it = Format.theFormat.getFiles().iterator();
|
|
boolean gotOne = false;
|
|
while (it.hasNext()) {
|
|
String propname = (String) it.next();
|
|
if (!matcher.reset(propname).matches()) continue;
|
|
//if (!propname.toLowerCase(Locale.ENGLISH).startsWith(fileName)) continue;
|
|
generateFile(propname);
|
|
gotOne = true;
|
|
}
|
|
if (!gotOne) {
|
|
throw new IllegalArgumentException(
|
|
"Non-matching file name: " + fileNamePattern);
|
|
}
|
|
}
|
|
}
|
|
|
|
public static void generateFile(String filename) throws IOException {
|
|
if (filename.endsWith("Aliases")) {
|
|
if (filename.endsWith("ValueAliases")) generateValueAliasFile(filename);
|
|
else generateAliasFile(filename);
|
|
} else if (filename.equals("NormalizationTest")) {
|
|
GenerateData.writeNormalizerTestSuite("DerivedData/", "NormalizationTest");
|
|
} else if (filename.equals("CaseFolding")) {
|
|
GenerateCaseFolding.makeCaseFold(false);
|
|
} else if (filename.equals("SpecialCasing")) {
|
|
GenerateCaseFolding.generateSpecialCasing(false);
|
|
} else if (filename.equals("StandardizedVariants")) {
|
|
GenerateStandardizedVariants.generate();
|
|
} else if (filename.equals("NamedSequences")) {
|
|
GenerateNamedSequences.generate();
|
|
} else if (filename.equals("GraphemeBreakTest")) {
|
|
new GenerateGraphemeBreakTest(Default.ucd()).run();
|
|
} else if (filename.equals("WordBreakTest")) {
|
|
new GenerateWordBreakTest(Default.ucd()).run();
|
|
} else if (filename.equals("LineBreakTest")) {
|
|
new GenerateLineBreakTest(Default.ucd()).run();
|
|
} else if (filename.equals("SentenceBreakTest")) {
|
|
new GenerateSentenceBreakTest(Default.ucd()).run();
|
|
} else {
|
|
generatePropertyFile(filename);
|
|
}
|
|
}
|
|
|
|
static final String SEPARATOR = "# ================================================";
|
|
|
|
public static void generateAliasFile(String filename) throws IOException {
|
|
UnicodeDataFile udf = UnicodeDataFile.openAndWriteHeader("DerivedData/", filename);
|
|
PrintWriter pw = udf.out;
|
|
UnicodeProperty.Factory ups
|
|
= ToolUnicodePropertySource.make(Default.ucdVersion());
|
|
TreeSet sortedSet = new TreeSet(CASELESS_COMPARATOR);
|
|
BagFormatter bf = new BagFormatter();
|
|
Tabber.MonoTabber mt = new Tabber.MonoTabber()
|
|
.add(10,Tabber.LEFT)
|
|
.add(30,Tabber.LEFT);
|
|
int count = 0;
|
|
|
|
for (int i = UnicodeProperty.LIMIT_TYPE - 1; i >= UnicodeProperty.BINARY; --i) {
|
|
if ((i & UnicodeProperty.EXTENDED_MASK) != 0) continue;
|
|
List list = ups.getAvailableNames(1<<i);
|
|
//if (list.size() == 0) continue;
|
|
sortedSet.clear();
|
|
StringBuffer buffer = new StringBuffer();
|
|
for (Iterator it = list.iterator(); it.hasNext();) {
|
|
String propAlias = (String)it.next();
|
|
|
|
UnicodeProperty up = ups.getProperty(propAlias);
|
|
List aliases = up.getNameAliases();
|
|
if (aliases.size() == 1) {
|
|
sortedSet.add(mt.process(aliases.get(0) + "\t; " + aliases.get(0)));
|
|
} else {
|
|
buffer.setLength(0);
|
|
boolean isFirst = true;
|
|
for (Iterator it2 = aliases.iterator(); it2.hasNext();) {
|
|
if (isFirst) isFirst = false;
|
|
else buffer.append("\t; ");
|
|
buffer.append(it2.next());
|
|
}
|
|
if (aliases.size() == 1) {
|
|
// repeat
|
|
buffer.append("\t; ").append(aliases.get(0));
|
|
}
|
|
sortedSet.add(mt.process(buffer.toString()));
|
|
}
|
|
}
|
|
if (i == UnicodeProperty.STRING) {
|
|
for (int j = 0; j < specialString.length; ++j) {
|
|
sortedSet.add(mt.process(specialString[j]));
|
|
}
|
|
} else if (i == UnicodeProperty.MISC) {
|
|
for (int j = 0; j < specialMisc.length; ++j) {
|
|
sortedSet.add(mt.process(specialMisc[j]));
|
|
}
|
|
}
|
|
pw.println();
|
|
pw.println(SEPARATOR);
|
|
pw.println("# " + UnicodeProperty.getTypeName(i) + " Properties");
|
|
pw.println(SEPARATOR);
|
|
for (Iterator it = sortedSet.iterator(); it.hasNext();) {
|
|
pw.println(it.next());
|
|
count++;
|
|
}
|
|
}
|
|
pw.println();
|
|
pw.println(SEPARATOR);
|
|
pw.println("# Total: " + count);
|
|
pw.println();
|
|
pw.println("# EOF");
|
|
udf.close();
|
|
}
|
|
|
|
static String[] specialMisc = {
|
|
"isc\t; ISO_Comment",
|
|
"na1\t; Unicode_1_Name",
|
|
"URS\t; Unicode_Radical_Stroke"};
|
|
|
|
static String[] specialString = {
|
|
"dm\t; Decomposition_Mapping",
|
|
"lc\t; Lowercase_Mapping",
|
|
"scc\t; Special_Case_Condition",
|
|
"sfc\t; Simple_Case_Folding",
|
|
"slc\t; Simple_Lowercase_Mapping",
|
|
"stc\t; Simple_Titlecase_Mapping",
|
|
"suc\t; Simple_Uppercase_Mapping",
|
|
"tc\t; Titlecase_Mapping",
|
|
"uc\t; Uppercase_Mapping"};
|
|
|
|
static String[] specialGC = {
|
|
"gc\t;\tC\t;\tOther\t# Cc | Cf | Cn | Co | Cs",
|
|
"gc\t;\tL\t;\tLetter\t# Ll | Lm | Lo | Lt | Lu",
|
|
"gc\t;\tLC\t;\tCased_Letter\t# Ll | Lt | Lu",
|
|
"gc\t;\tM\t;\tMark\t# Mc | Me | Mn",
|
|
"gc\t;\tN\t;\tNumber\t# Nd | Nl | No",
|
|
"gc\t;\tP\t;\tPunctuation\t;\tpunct\t# Pc | Pd | Pe | Pf | Pi | Po | Ps",
|
|
"gc\t;\tS\t;\tSymbol\t# Sc | Sk | Sm | So",
|
|
"gc\t;\tZ\t;\tSeparator\t# Zl | Zp | Zs"};
|
|
|
|
public static void generateValueAliasFile(String filename) throws IOException {
|
|
UnicodeDataFile udf = UnicodeDataFile.openAndWriteHeader("DerivedData/", filename);
|
|
PrintWriter pw = udf.out;
|
|
Format.theFormat.printFileComments(pw, filename);
|
|
UnicodeProperty.Factory toolFactory
|
|
= ToolUnicodePropertySource.make(Default.ucdVersion());
|
|
BagFormatter bf = new BagFormatter(toolFactory);
|
|
StringBuffer buffer = new StringBuffer();
|
|
Set sortedSet = new TreeSet(CASELESS_COMPARATOR);
|
|
|
|
//gc ; C ; Other # Cc | Cf | Cn | Co | Cs
|
|
// 123456789012345678901234567890123
|
|
|
|
// sc ; Arab ; Arabic
|
|
Tabber.MonoTabber mt2 = new Tabber.MonoTabber()
|
|
.add(3,Tabber.LEFT)
|
|
.add(2,Tabber.LEFT) // ;
|
|
.add(10,Tabber.LEFT)
|
|
.add(2,Tabber.LEFT) // ;
|
|
.add(33,Tabber.LEFT)
|
|
.add(2,Tabber.LEFT) // ;
|
|
.add(33,Tabber.LEFT);
|
|
|
|
// ccc; 216; ATAR ; Attached_Above_Right
|
|
Tabber.MonoTabber mt3 = new Tabber.MonoTabber()
|
|
.add(3,Tabber.LEFT)
|
|
.add(2,Tabber.LEFT) // ;
|
|
.add(3,Tabber.RIGHT)
|
|
.add(2,Tabber.LEFT) // ;
|
|
.add(5,Tabber.LEFT)
|
|
.add(2,Tabber.LEFT) // ;
|
|
.add(33,Tabber.LEFT)
|
|
.add(2,Tabber.LEFT) // ;
|
|
.add(33,Tabber.LEFT);
|
|
|
|
for (Iterator it = toolFactory.getAvailableNames(UnicodeProperty.ENUMERATED_OR_CATALOG_MASK).iterator(); it.hasNext();) {
|
|
String propName = (String) it.next();
|
|
UnicodeProperty up = toolFactory.getProperty(propName);
|
|
String shortProp = up.getFirstNameAlias();
|
|
sortedSet.clear();
|
|
|
|
for (Iterator it2 = up.getAvailableValues().iterator(); it2.hasNext();) {
|
|
String value = (String) it2.next();
|
|
List l = up.getValueAliases(value);
|
|
if (DEBUG) System.out.println(value + "\t" + bf.join(l));
|
|
|
|
// HACK
|
|
Tabber mt = mt2;
|
|
if (l.size() == 1) {
|
|
if (propName.equals("Canonical_Combining_Class")) continue;
|
|
if (propName.equals("Block")
|
|
|| propName.equals("Joining_Group")
|
|
//|| propName.equals("Numeric_Type")
|
|
|| propName.equals("Age")) {
|
|
l.add(0, "n/a");
|
|
} else {
|
|
l.add(0, l.get(0)); // double up
|
|
}
|
|
} else if (propName.equals("Canonical_Combining_Class")) {
|
|
mt = mt3;
|
|
}
|
|
if (UnicodeProperty.equalNames(value,"Cyrillic_Supplement")) {
|
|
l.add("Cyrillic_Supplementary");
|
|
}
|
|
|
|
buffer.setLength(0);
|
|
buffer.append(shortProp);
|
|
for (Iterator it3 = l.iterator(); it3.hasNext();) {
|
|
buffer.append("\t; \t" + it3.next());
|
|
}
|
|
|
|
sortedSet.add(mt.process(buffer.toString()));
|
|
}
|
|
// HACK
|
|
if (propName.equals("General_Category")) {
|
|
for (int i = 0; i < specialGC.length; ++i) {
|
|
sortedSet.add(mt2.process(specialGC[i]));
|
|
}
|
|
}
|
|
pw.println();
|
|
pw.println("# " + propName + " (" + shortProp + ")");
|
|
pw.println();
|
|
for (Iterator it4 = sortedSet.iterator(); it4.hasNext();) {
|
|
String line = (String) it4.next();
|
|
pw.println(line);
|
|
}
|
|
}
|
|
pw.println();
|
|
pw.println("# EOF");
|
|
udf.close();
|
|
}
|
|
|
|
public static void generatePropertyFile(String filename) throws IOException {
|
|
String dir = (String) Format.theFormat.fileToDirectory.get(filename);
|
|
if (dir == null) dir = "";
|
|
UnicodeDataFile udf =
|
|
UnicodeDataFile.openAndWriteHeader("DerivedData/" + dir, filename);
|
|
PrintWriter pw = udf.out;
|
|
// bf2.openUTF8Writer(UCD_Types.GEN_DIR, "Test" + filename + ".txt");
|
|
Format.theFormat.printFileComments(pw, filename);
|
|
UnicodeProperty.Factory toolFactory =
|
|
ToolUnicodePropertySource.make(Default.ucdVersion());
|
|
UnicodeSet unassigned =
|
|
toolFactory.getSet("gc=cn").addAll(toolFactory.getSet("gc=cs"));
|
|
//System.out.println(unassigned.toPattern(true));
|
|
// .removeAll(toolFactory.getSet("noncharactercodepoint=true"));
|
|
|
|
List propList = Format.theFormat.getPropertiesFromFile(filename);
|
|
for (Iterator propIt = propList.iterator(); propIt.hasNext();) {
|
|
BagFormatter bf = new BagFormatter(toolFactory);
|
|
String nextPropName = (String) propIt.next();
|
|
UnicodeProperty prop = toolFactory.getProperty(nextPropName);
|
|
String name = prop.getName();
|
|
System.out.println("Property: " + name + "; " + prop.getTypeName(prop.getType()));
|
|
pw.println();
|
|
pw.println(SEPARATOR);
|
|
String propComment = Format.theFormat.getValueComments(name, "");
|
|
if (propComment != null && propComment.length() != 0) {
|
|
pw.println();
|
|
pw.println(propComment);
|
|
} else if (!prop.isType(UnicodeProperty.BINARY_MASK)) {
|
|
pw.println();
|
|
pw.println("# Property:\t" + name);
|
|
}
|
|
|
|
Format.PrintStyle ps = Format.theFormat.getPrintStyle(name);
|
|
if (DEBUG) System.out.println(ps.toString());
|
|
|
|
if (!prop.isType(UnicodeProperty.BINARY_MASK) &&
|
|
(ps.skipUnassigned != null || ps.skipValue != null)) {
|
|
String v = ps.skipValue;
|
|
if (v == null) {
|
|
v = ps.skipUnassigned;
|
|
}
|
|
String v2 = prop.getFirstValueAlias(v);
|
|
if (UnicodeProperty.compareNames(v,v2) != 0) v = v + " (" + v2 + ")";
|
|
pw.println();
|
|
pw.println("# All code points not explicitly listed for " + prop.getName());
|
|
pw.println("# have the value " + v + ".");
|
|
}
|
|
|
|
if (!ps.interleaveValues && prop.isType(UnicodeProperty.BINARY_MASK)) {
|
|
if (DEBUG) System.out.println("Resetting Binary Values");
|
|
ps.skipValue = "False";
|
|
if (ps.nameStyle.equals("none")) ps.nameStyle = "long";
|
|
ps.valueStyle = "none";
|
|
}
|
|
|
|
if (ps.noLabel) {
|
|
bf.setLabelSource(null);
|
|
}
|
|
if (ps.nameStyle.equals("none")) {
|
|
bf.setPropName(null);
|
|
} else if (ps.nameStyle.equals("short")) {
|
|
bf.setPropName(prop.getFirstNameAlias());
|
|
} else {
|
|
bf.setPropName(name);
|
|
}
|
|
|
|
if (ps.interleaveValues) {
|
|
writeInterleavedValues(pw, bf, prop, ps);
|
|
} else if (prop.isType(UnicodeProperty.STRING_OR_MISC_MASK)) {
|
|
writeStringValues(pw, bf, prop);
|
|
//} else if (prop.isType(UnicodeProperty.BINARY_MASK)) {
|
|
// writeBinaryValues(pw, bf, prop);
|
|
} else {
|
|
writeEnumeratedValues(pw, bf, unassigned, prop, ps);
|
|
}
|
|
}
|
|
pw.println();
|
|
pw.println("# EOF");
|
|
udf.close();
|
|
}
|
|
|
|
private static void writeEnumeratedValues(
|
|
PrintWriter pw,
|
|
BagFormatter bf,
|
|
UnicodeSet unassigned,
|
|
UnicodeProperty prop,
|
|
Format.PrintStyle ps) {
|
|
if (DEBUG) System.out.println("Writing Enumerated Values: " + prop.getName());
|
|
|
|
bf.setValueSource(new UnicodeProperty.FilteredProperty(prop, Format.theFormat.hackMapFilter));
|
|
Collection aliases = prop.getAvailableValues();
|
|
if (ps.orderByRangeStart) {
|
|
if (DEBUG) System.out.println("Reordering");
|
|
TreeSet temp2 = new TreeSet(new RangeStartComparator(prop));
|
|
temp2.addAll(aliases);
|
|
aliases = temp2;
|
|
}
|
|
if (ps.sortNumeric) {
|
|
if (DEBUG) System.out.println("Reordering");
|
|
TreeSet temp2 = new TreeSet(NUMERIC_STRING_COMPARATOR);
|
|
temp2.addAll(aliases);
|
|
aliases = temp2;
|
|
}
|
|
System.out.println("Check: " + prop.getValue(0xE000));
|
|
String missing = ps.skipUnassigned != null ? ps.skipUnassigned : ps.skipValue;
|
|
if (missing != null && !missing.equals("False")) {
|
|
pw.println();
|
|
String propName = bf.getPropName();
|
|
if (propName == null) propName = "";
|
|
else if (propName.length() != 0) propName = propName + "; ";
|
|
pw.println("# @missing: 0000..10FFFF; " + propName + missing);
|
|
}
|
|
for (Iterator it = aliases.iterator(); it.hasNext();) {
|
|
String value = (String)it.next();
|
|
if (DEBUG) System.out.println("Getting value " + value);
|
|
UnicodeSet s = prop.getSet(value);
|
|
String valueComment = Format.theFormat.getValueComments(prop.getName(), value);
|
|
|
|
if (DEBUG) {
|
|
System.out.println("Value:\t" + value + "\t" + prop.getFirstValueAlias(value) + "\tskip:" + ps.skipValue);
|
|
System.out.println("Value Comment\t" + valueComment);
|
|
System.out.println(s.toPattern(true));
|
|
}
|
|
|
|
int totalSize = s.size();
|
|
if (s.size() == 0) {
|
|
System.out.println("\tSkipping Empty: " + prop.getName() + "=" + value);
|
|
continue;
|
|
}
|
|
|
|
if (UnicodeProperty.compareNames(value, ps.skipValue) == 0) {
|
|
if (DEBUG) System.out.println("Skipping: " + value);
|
|
continue;
|
|
}
|
|
|
|
if (UnicodeProperty.compareNames(value, ps.skipUnassigned) == 0) {
|
|
bf.setFullTotal(s.size());
|
|
if (DEBUG) System.out.println("Removing Unassigneds: " + value);
|
|
s.removeAll(unassigned);
|
|
}
|
|
|
|
//if (s.size() == 0) continue;
|
|
//if (unassigned.containsAll(s)) continue; // skip if all unassigned
|
|
//if (s.contains(0xD0000)) continue; // skip unassigned
|
|
|
|
boolean nonLongValue = false;
|
|
String displayValue = value;
|
|
if (ps.valueStyle.equals("none")) {
|
|
displayValue = null;
|
|
nonLongValue = true;
|
|
} else if (ps.valueStyle.equals("short")) {
|
|
displayValue = prop.getFirstValueAlias(displayValue);
|
|
nonLongValue = true;
|
|
if (DEBUG) System.out.println("Changing value " + displayValue);
|
|
}
|
|
if (ps.makeUppercase && displayValue != null) {
|
|
displayValue = displayValue.toUpperCase(Locale.ENGLISH);
|
|
if (DEBUG) System.out.println("Changing value2 " + displayValue);
|
|
}
|
|
if (ps.makeFirstLetterLowercase && displayValue != null) {
|
|
// NOTE: this is ok since we are only working in ASCII
|
|
displayValue = displayValue.substring(0,1).toLowerCase(Locale.ENGLISH)
|
|
+ displayValue.substring(1);
|
|
if (DEBUG) System.out.println("Changing value2 " + displayValue);
|
|
}
|
|
if (DEBUG) System.out.println("Setting value " + displayValue);
|
|
bf.setValueSource(displayValue);
|
|
|
|
if (!prop.isType(UnicodeProperty.BINARY_MASK)) {
|
|
pw.println();
|
|
pw.println(SEPARATOR);
|
|
if (nonLongValue) {
|
|
pw.println();
|
|
pw.println("# " + prop.getName() + "=" + value);
|
|
}
|
|
}
|
|
|
|
if (valueComment != null) {
|
|
pw.println();
|
|
pw.println(valueComment);
|
|
}
|
|
if (false && ps.longValueHeading != null) {
|
|
String headingValue = value;
|
|
if (ps.longValueHeading == "ccc") {
|
|
headingValue = Utility.replace(value, "_", "");
|
|
char c = headingValue.charAt(0);
|
|
if ('0' <= c && c <= '9') headingValue = "Other Combining Class";
|
|
}
|
|
pw.println();
|
|
pw.println("# " + headingValue);
|
|
}
|
|
pw.println();
|
|
//if (s.size() != 0)
|
|
bf.showSetNames(pw, s);
|
|
//System.out.println(bf.showSetNames(s));
|
|
}
|
|
|
|
}
|
|
//static NumberFormat nf = NumberFormat.getInstance();
|
|
static Comparator NUMERIC_STRING_COMPARATOR = new Comparator() {
|
|
public int compare(Object o1, Object o2) {
|
|
if (o1 == o2) return 0;
|
|
if (o1 == null) return -1;
|
|
if (o2 == null) return 1;
|
|
return Double.compare(
|
|
Double.parseDouble((String) o1),
|
|
Double.parseDouble((String) o2));
|
|
}
|
|
|
|
};
|
|
/*
|
|
private static void writeBinaryValues(
|
|
PrintWriter pw,
|
|
BagFormatter bf,
|
|
UnicodeProperty prop) {
|
|
if (DEBUG) System.out.println("Writing Binary Values: " + prop.getName());
|
|
UnicodeSet s = prop.getSet("True");
|
|
bf.setValueSource(prop.getName());
|
|
bf.showSetNames(pw, s);
|
|
}
|
|
*/
|
|
|
|
private static void writeInterleavedValues(
|
|
PrintWriter pw,
|
|
BagFormatter bf,
|
|
UnicodeProperty prop, PrintStyle ps) {
|
|
if (DEBUG) System.out.println("Writing Interleaved Values: " + prop.getName());
|
|
pw.println();
|
|
bf.setValueSource(new UnicodeProperty.FilteredProperty(prop, new RestoreSpacesFilter(ps)))
|
|
.setNameSource(null)
|
|
.setLabelSource(null)
|
|
.setRangeBreakSource(null)
|
|
.setShowCount(false)
|
|
.showSetNames(pw,new UnicodeSet(0,0x10FFFF));
|
|
}
|
|
|
|
private static void writeStringValues(
|
|
PrintWriter pw,
|
|
BagFormatter bf,
|
|
UnicodeProperty prop) {
|
|
if (DEBUG) System.out.println("Writing String Values: " + prop.getName());
|
|
pw.println();
|
|
bf.setValueSource(prop)
|
|
.setHexValue(true)
|
|
.setMergeRanges(false)
|
|
.showSetNames(pw,new UnicodeSet(0,0x10FFFF));
|
|
}
|
|
|
|
static class RangeStartComparator implements Comparator {
|
|
UnicodeProperty prop;
|
|
CompareProperties.UnicodeSetComparator comp = new CompareProperties.UnicodeSetComparator();
|
|
RangeStartComparator(UnicodeProperty prop) {
|
|
this.prop = prop;
|
|
}
|
|
public int compare(Object o1, Object o2) {
|
|
UnicodeSet s1 = prop.getSet((String)o1);
|
|
UnicodeSet s2 = prop.getSet((String)o2);
|
|
if (true) System.out.println("comparing " + o1 + ", " + o2
|
|
+ s1.toPattern(true) + "?" + s2.toPattern(true)
|
|
+ ", " + comp.compare(s1, s2));
|
|
return comp.compare(s1, s2);
|
|
}
|
|
|
|
}
|
|
|
|
static class RestoreSpacesFilter extends UnicodeProperty.StringFilter {
|
|
String skipValue;
|
|
/**
|
|
* @param ps
|
|
*/
|
|
public RestoreSpacesFilter(PrintStyle ps) {
|
|
skipValue = ps.skipValue;
|
|
if (skipValue == null) skipValue = ps.skipUnassigned;
|
|
}
|
|
|
|
public String remap(String original) {
|
|
// ok, because doesn't change length
|
|
String mod = (String) Format.theFormat.hackMap.get(original);
|
|
if (mod != null) original = mod;
|
|
if (original.equals(skipValue)) return null;
|
|
return original.replace('_',' ');
|
|
}
|
|
}
|
|
|
|
static Comparator CASELESS_COMPARATOR = new Comparator() {
|
|
public int compare(Object o1, Object o2) {
|
|
String s = o1.toString();
|
|
String t = o2.toString();
|
|
return s.compareToIgnoreCase(t);
|
|
}
|
|
};
|
|
|
|
public static void showDiff() throws IOException {
|
|
PrintWriter out = BagFormatter.openUTF8Writer(UCD_Types.GEN_DIR, "propertyDifference.txt");
|
|
try {
|
|
showDifferences(out, "4.0.1", "LB", "GC");
|
|
showDifferences(out, "4.0.1", "East Asian Width", "LB");
|
|
showDifferences(out, "4.0.1", "East Asian Width", "GC");
|
|
} finally {
|
|
out.close();
|
|
}
|
|
}
|
|
|
|
public static void showAllDiff() throws IOException {
|
|
PrintWriter out = BagFormatter.openUTF8Writer(UCD_Types.GEN_DIR, "propertyDifference.txt");
|
|
try {
|
|
UnicodeProperty.Factory fac = ToolUnicodePropertySource.make("4.0.1");
|
|
List props = fac.getAvailableNames(
|
|
(1<<UnicodeProperty.BINARY)
|
|
| (1<<UnicodeProperty.ENUMERATED)
|
|
//| (1<<UnicodeProperty.CATALOG)
|
|
);
|
|
Set skipList = new HashSet();
|
|
skipList.add("Age");
|
|
skipList.add("Joining_Group");
|
|
skipList.add("Canonical_Combining_Class");
|
|
|
|
for (Iterator it = props.iterator(); it.hasNext();) {
|
|
String prop1 = (String) it.next();
|
|
for (Iterator it2 = props.iterator(); it2.hasNext();) {
|
|
String prop2 = (String) it2.next();
|
|
if (prop1.equals(prop2)) continue;
|
|
if (skipList.contains(prop2)) continue;
|
|
System.out.println(prop1 + " vs. " + prop2);
|
|
showDifferences(out, fac.getProperty(prop1), fac.getProperty(prop2), false);
|
|
out.flush();
|
|
}
|
|
}
|
|
} catch (Exception e) {
|
|
e.printStackTrace();
|
|
} finally {
|
|
out.close();
|
|
}
|
|
}
|
|
|
|
static NumberFormat nf = NumberFormat.getIntegerInstance(Locale.ENGLISH);
|
|
|
|
|
|
static void showDifferences(PrintWriter out, String version, String prop1, String prop2) throws IOException {
|
|
UnicodeProperty p1 = ToolUnicodePropertySource.make(version).getProperty(prop1);
|
|
UnicodeProperty p2 = ToolUnicodePropertySource.make(version).getProperty(prop2);
|
|
showDifferences(out, p1, p2, true);
|
|
}
|
|
|
|
static void showDifferences(PrintWriter out, UnicodeProperty p1, UnicodeProperty p2, boolean doOverlaps) throws IOException {
|
|
BagFormatter bf = new BagFormatter();
|
|
//out.println("Comparing " + p1.getName() + " and " + p2.getName());
|
|
System.out.println("Comparing " + p1.getName() + " and " + p2.getName());
|
|
String pn1 = '$' + p1.getName();
|
|
String pn2 = '$' + p2.getName();
|
|
UnicodeSet intersection = new UnicodeSet();
|
|
UnicodeSet disjoint = new UnicodeSet();
|
|
String skip1 = p1.getValue(0xEFFFD);
|
|
String skip2 = p2.getValue(0xEFFFD);
|
|
main:
|
|
for (Iterator it1 = p1.getAvailableValues().iterator(); it1.hasNext();) {
|
|
String v1 = (String)it1.next();
|
|
if (v1.equals(skip1)) continue;
|
|
UnicodeSet s1 = p1.getSet(v1);
|
|
if (s1.size() == 0) continue;
|
|
String pv1 = pn1 + (v1.equals("True") ? "" : ":" + v1);
|
|
//v1 += " (" + p1.getFirstValueAlias(v1) + ")";
|
|
System.out.println(v1);
|
|
//out.println();
|
|
//out.println(v1 + " [" + nf.format(s1.size()) + "]");
|
|
|
|
// create some containers so that the output is organized reasonably
|
|
String contains = "";
|
|
String overlaps = "";
|
|
UnicodeSet containsSet = new UnicodeSet();
|
|
Set overlapsSet = new TreeSet();
|
|
for (Iterator it2 = p2.getAvailableValues().iterator(); it2.hasNext();) {
|
|
String v2 = (String)it2.next();
|
|
if (v2.equals(skip2)) continue;
|
|
UnicodeSet s2 = p2.getSet(v2);
|
|
if (s2.size() == 0) continue;
|
|
// v2 += "(" + p2.getFirstValueAlias(v2) + ")";
|
|
//v2 = p2.getFirstValueAlias(v2);
|
|
String pv2 = pn2 + (v2.equals("True") ? "" : ":" + v2);
|
|
if (s1.containsNone(s2)) continue;
|
|
if (s1.equals(s2)) {
|
|
out.println(pv1 + "\t= " + pv2);
|
|
continue main; // since they are partitions, we can stop here
|
|
} else if (s2.containsAll(s1)) {
|
|
// out.println(pv1 + "\t\u2282 " + pv2);
|
|
continue main; // partition, stop
|
|
} else if (s1.containsAll(s2)) {
|
|
if (contains.length() != 0) contains += " ";
|
|
contains += pv2;
|
|
containsSet.addAll(s2);
|
|
if (containsSet.size() == s1.size()) break;
|
|
} else if (doOverlaps) { // doesn't contain, isn't contained
|
|
if (overlaps.length() != 0) overlaps += "\r\n\t";
|
|
intersection.clear().addAll(s2).retainAll(s1);
|
|
disjoint.clear().addAll(s1).removeAll(s2);
|
|
overlaps += "\u2283 " + v2 + " [" + nf.format(intersection.size()) + "]"
|
|
+ " \u2285 " + v2 + " [" + nf.format(disjoint.size()) + "]";
|
|
overlapsSet.add(v2);
|
|
}
|
|
}
|
|
if (contains.length() != 0) {
|
|
out.println(pv1 + (containsSet.size() == s1.size() ? "\t= "
|
|
: "\t\u2283 ") + "[" + contains + "]");
|
|
}
|
|
if (overlaps.length() != 0) out.println("\t" + overlaps);
|
|
if (false && overlapsSet.size() != 0) {
|
|
out.println("\t\u2260\u2284\u2285");
|
|
for (Iterator it3 = overlapsSet.iterator(); it3.hasNext();) {
|
|
String v3 = (String) it3.next();
|
|
UnicodeSet s3 = p2.getSet(v3);
|
|
out.println("\t" + v3);
|
|
bf.showSetDifferences(out,v1,s1,v3,s3);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static class UnicodeDataHack extends UnicodeLabel {
|
|
private UnicodeProperty.Factory factory;
|
|
private UnicodeProperty name;
|
|
private UnicodeProperty bidiMirrored;
|
|
private UnicodeProperty numericValue;
|
|
private UnicodeProperty numericType;
|
|
private UnicodeProperty decompositionValue;
|
|
private UnicodeProperty decompositionType;
|
|
private UnicodeProperty bidiClass;
|
|
private UnicodeProperty combiningClass;
|
|
private UnicodeProperty category;
|
|
UnicodeDataHack(UnicodeProperty.Factory factory) {
|
|
this.factory = factory;
|
|
name = factory.getProperty("Name");
|
|
category = factory.getProperty("General_Category");
|
|
combiningClass = factory.getProperty("Canonical_Combining_Class");
|
|
bidiClass = factory.getProperty("Bidi_Class");
|
|
decompositionType = factory.getProperty("Decomposition_Type");
|
|
decompositionValue = factory.getProperty("Decomposition_Value");
|
|
numericType = factory.getProperty("Numeric_Type");
|
|
numericValue = factory.getProperty("Numeric_Value");
|
|
bidiMirrored = factory.getProperty("Bidi_Mirrored");
|
|
//name10
|
|
//isoComment
|
|
}
|
|
public String getValue(int codepoint, boolean isShort) {
|
|
String nameStr = name.getName();
|
|
if (nameStr.startsWith("<reserved")) return null;
|
|
String code = Utility.hex(codepoint);
|
|
int pos = nameStr.indexOf(code);
|
|
if (pos > 0) {
|
|
nameStr = nameStr.substring(0,pos) + "%" + nameStr.substring(pos + code.length());
|
|
}
|
|
nameStr += ";"
|
|
+ category.getValue(codepoint, true) + ";"
|
|
+ combiningClass.getValue(codepoint, true) + ";"
|
|
+ bidiClass.getValue(codepoint, true) + ";"
|
|
;
|
|
String temp = decompositionType.getValue(codepoint, true);
|
|
if (!temp.equals("None")) {
|
|
nameStr += "<" + temp + "> " + Utility.hex(decompositionValue.getValue(codepoint));
|
|
}
|
|
nameStr += ";";
|
|
temp = numericType.getValue(codepoint, true);
|
|
if (temp.equals("Decimal")) {
|
|
nameStr += temp + ";" + temp + ";" + temp + ";";
|
|
} else if (temp.equals("Digit")) {
|
|
nameStr += ";" + temp + ";" + temp + ";";
|
|
} else if (temp.equals("Numeric")) {
|
|
nameStr += ";;" + temp + ";";
|
|
} else if (temp.equals("Digit")) {
|
|
nameStr += ";;;";
|
|
}
|
|
if (bidiMirrored.getValue(codepoint, true).equals("True")) {
|
|
nameStr += "Y" + ";";
|
|
}
|
|
nameStr += ";";
|
|
return nameStr;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
static class PropertySymbolTable implements SymbolTable {
|
|
static boolean DEBUG = false;
|
|
UnicodeProperty.Factory factory;
|
|
//static Matcher identifier = Pattern.compile("([:letter:] [\\_\\-[:letter:][:number:]]*)").matcher("");
|
|
|
|
PropertySymbolTable (UnicodeProperty.Factory factory) {
|
|
this.factory = factory;
|
|
}
|
|
|
|
public char[] lookup(String s) {
|
|
if (DEBUG) System.out.println("\tLooking up " + s);
|
|
int pos = s.indexOf('=');
|
|
if (pos < 0) return null; // should never happen
|
|
UnicodeProperty prop = factory.getProperty(s.substring(0,pos));
|
|
if (prop == null) {
|
|
throw new IllegalArgumentException("Invalid Property: " + s + "\r\nUse "
|
|
+ showSet(factory.getAvailableNames()));
|
|
}
|
|
String value = s.substring(pos+1);
|
|
UnicodeSet set = prop.getSet(value);
|
|
if (set.size() == 0) {
|
|
throw new IllegalArgumentException("Empty Property-Value: " + s + "\r\nUse "
|
|
+ showSet(prop.getAvailableValues()));
|
|
}
|
|
if (DEBUG) System.out.println("\tReturning " + set.toPattern(true));
|
|
return set.toPattern(true).toCharArray(); // really ugly
|
|
}
|
|
|
|
private String showSet(List list) {
|
|
StringBuffer result = new StringBuffer("[");
|
|
boolean first = true;
|
|
for (Iterator it = list.iterator(); it.hasNext();) {
|
|
if (!first) result.append(", ");
|
|
else first = false;
|
|
result.append(it.next().toString());
|
|
}
|
|
result.append("]");
|
|
return result.toString();
|
|
}
|
|
|
|
public UnicodeMatcher lookupMatcher(int ch) {
|
|
return null;
|
|
}
|
|
|
|
public String parseReference(String text, ParsePosition pos, int limit) {
|
|
if (DEBUG) System.out.println("\tParsing <" + text.substring(pos.getIndex(),limit) + ">");
|
|
int start = pos.getIndex();
|
|
int i = getIdentifier(text, start, limit);
|
|
if (i == start) return null;
|
|
String prop = text.substring(start, i);
|
|
String value = "true";
|
|
if (i < limit) {
|
|
int cp = text.charAt(i);
|
|
if (cp == ':' || cp == '=') {
|
|
int j = getIdentifier(text, i+1, limit);
|
|
value = text.substring(i+1, j);
|
|
i = j;
|
|
}
|
|
}
|
|
pos.setIndex(i);
|
|
if (DEBUG) System.out.println("\tParsed <" + prop + ">=<" + value + ">");
|
|
return prop + '=' + value;
|
|
}
|
|
|
|
private int getIdentifier(String text, int start, int limit) {
|
|
if (DEBUG) System.out.println("\tGetID <" + text.substring(start,limit) + ">");
|
|
int cp = 0;
|
|
int i;
|
|
for (i = start; i < limit; i += UTF16.getCharCount(cp)) {
|
|
cp = UTF16.charAt(text, i);
|
|
if (!com.ibm.icu.lang.UCharacter.isUnicodeIdentifierPart(cp)) {
|
|
break;
|
|
}
|
|
}
|
|
if (DEBUG) System.out.println("\tGotID <" + text.substring(start,i) + ">");
|
|
return i;
|
|
}
|
|
|
|
};
|
|
|
|
/* getCombo(UnicodeProperty.Factory factory, String line) {
|
|
UnicodeSet result = new UnicodeSet();
|
|
String[] pieces = Utility.split(line, '+');
|
|
for (int i = 0; i < pieces.length; ++i) {
|
|
String[] parts = Utility.split(pieces[i],':');
|
|
String prop = parts[0].trim();
|
|
String value = "true";
|
|
if (parts.length > 1) value = parts[1].trim();
|
|
UnicodeProperty p = factory.getProperty(prop);
|
|
result.addAll(p.getSet(value));
|
|
}
|
|
return result;
|
|
}
|
|
*/
|
|
}
|
|
|
|
/*
|
|
static class OrderedMap {
|
|
HashMap map = new HashMap();
|
|
ArrayList keys = new ArrayList();
|
|
void put(Object o, Object t) {
|
|
map.put(o,t);
|
|
keys.add(o);
|
|
}
|
|
List keyset() {
|
|
return keys;
|
|
}
|
|
}
|
|
*/ |