package com.ibm.text.UCD; import java.io.BufferedReader; import java.io.IOException; import java.io.PrintWriter; import java.text.ParseException; import java.text.ParsePosition; import java.util.Arrays; import java.util.Iterator; import java.util.List; import java.util.Locale; import com.ibm.icu.dev.test.util.BagFormatter; import com.ibm.icu.text.SymbolTable; import com.ibm.icu.text.UTF16; import com.ibm.icu.text.UnicodeMatcher; import com.ibm.icu.text.UnicodeSet; import com.ibm.text.utility.Utility; public class TestUnicodeInvariants { public static void main(String[] args) throws IOException { testInvariants(); } /** * Chain together several SymbolTables. * @author Davis */ static class ChainedSymbolTable implements SymbolTable { // TODO: add accessors? private List symbolTables; /** * Each SymbolTable is each accessed in order by the other methods, * so the first in the list is accessed first, etc. * @param symbolTables */ ChainedSymbolTable(SymbolTable[] symbolTables) { this.symbolTables = Arrays.asList(symbolTables); } public char[] lookup(String s) { for (Iterator it = symbolTables.iterator(); it.hasNext();) { SymbolTable st = (SymbolTable) it.next(); char[] result = st.lookup(s); if (result != null) return result; } return null; } public UnicodeMatcher lookupMatcher(int ch) { for (Iterator it = symbolTables.iterator(); it.hasNext();) { SymbolTable st = (SymbolTable) it.next(); UnicodeMatcher result = st.lookupMatcher(ch); if (result != null) return result; } return null; } // Warning: this depends on pos being left alone unless a string is returned!! public String parseReference(String text, ParsePosition pos, int limit) { for (Iterator it = symbolTables.iterator(); it.hasNext();) { SymbolTable st = (SymbolTable) it.next(); String result = st.parseReference(text, pos, limit); if (result != null) return result; } return null; } } static final UnicodeSet INVARIANT_RELATIONS = new UnicodeSet("[\\~ \\= \\! \\? \\< \\> \u2264 \u2265 \u2282 \u2286 \u2283 \u2287]"); public static void testInvariants() throws IOException { String[][] variables = new String[100][2]; int variableCount = 0; PrintWriter out = BagFormatter.openUTF8Writer(UCD_Types.GEN_DIR, "UnicodeInvariantResults.txt"); out.write('\uFEFF'); // BOM BufferedReader in = BagFormatter.openUTF8Reader("", "UnicodeInvariants.txt"); BagFormatter bf = new BagFormatter(); bf.setUnicodePropertyFactory(ToolUnicodePropertySource.make("")); BagFormatter bf2 = new BagFormatter(); bf2.setUnicodePropertyFactory(ToolUnicodePropertySource.make("")); bf2.setMergeRanges(false); ChainedSymbolTable st = new ChainedSymbolTable(new SymbolTable[] { ToolUnicodePropertySource.make("4.0.0").getSymbolTable("\u00D7"), ToolUnicodePropertySource.make(Default.ucdVersion()).getSymbolTable("")}); ParsePosition pp = new ParsePosition(0); int parseErrorCount = 0; int testFailureCount = 0; while (true) { String line = in.readLine(); if (line == null) break; if (line.startsWith("\uFEFF")) line = line.substring(1); out.println(line); line = line.trim(); int pos = line.indexOf('#'); if (pos >= 0) line = line.substring(0,pos).trim(); if (line.length() == 0) continue; // fix all the variables String oldLine = line; line = Utility.replace(line, variables, variableCount); // detect variables if (line.startsWith("Let")) { int x = line.indexOf('='); variables[variableCount][0] = line.substring(3,x).trim(); variables[variableCount][1] = line.substring(x+1).trim(); variableCount++; if (false) System.out.println("Added variable: <" + variables[variableCount-1][0] + "><" + variables[variableCount-1][1] + ">"); continue; } // detect variables if (line.startsWith("Show")) { String part = line.substring(4).trim(); pp.setIndex(0); UnicodeSet leftSet = new UnicodeSet(part, pp, st); bf2.showSetNames(out, leftSet); continue; } char relation = 0; String rightSide = null; String leftSide = null; UnicodeSet leftSet = null; UnicodeSet rightSet = null; try { pp.setIndex(0); leftSet = new UnicodeSet(line, pp, st); leftSide = line.substring(0,pp.getIndex()); eatWhitespace(line, pp); relation = line.charAt(pp.getIndex()); if (!INVARIANT_RELATIONS.contains(relation)) { throw new ParseException("Invalid relation, must be one of " + INVARIANT_RELATIONS.toPattern(false), pp.getIndex()); } pp.setIndex(pp.getIndex()+1); // skip char eatWhitespace(line, pp); int start = pp.getIndex(); rightSet = new UnicodeSet(line, pp, st); rightSide = line.substring(start,pp.getIndex()); eatWhitespace(line, pp); if (line.length() != pp.getIndex()) { throw new ParseException("Extra characters at end", pp.getIndex()); } } catch (ParseException e) { out.println("PARSE ERROR:\t" + line.substring(0,e.getErrorOffset()) + "<@>" + line.substring(e.getErrorOffset())); out.println(); out.println("**** START Error Info ****"); out.println(e.getMessage()); out.println("**** END Error Info ****"); out.println(); parseErrorCount++; continue; } catch (IllegalArgumentException e) { out.println("PARSE ERROR:\t" + line); out.println(); out.println("**** START Error Info ****"); out.println(e.getMessage()); out.println("**** END Error Info ****"); out.println(); parseErrorCount++; continue; } boolean ok = true; switch(relation) { case '=': ok = leftSet.equals(rightSet); break; case '<': case '\u2282': ok = rightSet.containsAll(leftSet) && !leftSet.equals(rightSet); break; case '>': case '\u2283': ok = leftSet.containsAll(rightSet) && !leftSet.equals(rightSet); break; case '\u2264': case '\u2286': ok = rightSet.containsAll(leftSet); break; case '\u2265': case '\u2287': ok = leftSet.containsAll(rightSet); break; case '!': ok = leftSet.containsNone(rightSet); break; case '?': ok = !leftSet.equals(rightSet) && !leftSet.containsAll(rightSet) && !rightSet.containsAll(leftSet) && !leftSet.containsNone(rightSet); break; default: throw new IllegalArgumentException("Internal Error"); } if (ok) continue; out.println(); out.println(String.valueOf(ok).toUpperCase(Locale.ENGLISH)); out.println("**** START Error Info ****"); bf.showSetDifferences(out, rightSide, rightSet, leftSide, leftSet); out.println("**** END Error Info ****"); out.println(); testFailureCount++; } out.println(); out.println("**** SUMMARY ****"); out.println(); out.println("ParseErrorCount=" + parseErrorCount); out.println("TestFailureCount=" + testFailureCount); out.close(); System.out.println("ParseErrorCount=" + parseErrorCount); System.out.println("TestFailureCount=" + testFailureCount); } /** * @param line * @param pp */ private static void eatWhitespace(String line, ParsePosition pp) { int cp = 0; int i; for (i = pp.getIndex(); i < line.length(); i += UTF16.getCharCount(cp)) { cp = UTF16.charAt(line, i); if (!com.ibm.icu.lang.UCharacter.isUWhiteSpace(cp)) { break; } } pp.setIndex(i); } }