ad417f752e
X-SVN-Rev: 17248
217 lines
8.6 KiB
Java
217 lines
8.6 KiB
Java
package com.ibm.text.UCD;
|
|
import java.io.BufferedReader;
|
|
import java.io.IOException;
|
|
import java.io.PrintWriter;
|
|
import java.text.ParseException;
|
|
import java.text.ParsePosition;
|
|
import java.util.Arrays;
|
|
import java.util.Iterator;
|
|
import java.util.List;
|
|
import java.util.Locale;
|
|
|
|
import com.ibm.icu.dev.test.util.BagFormatter;
|
|
import com.ibm.icu.text.SymbolTable;
|
|
import com.ibm.icu.text.UTF16;
|
|
import com.ibm.icu.text.UnicodeMatcher;
|
|
import com.ibm.icu.text.UnicodeSet;
|
|
import com.ibm.text.utility.Utility;
|
|
|
|
public class TestUnicodeInvariants {
|
|
|
|
public static void main(String[] args) throws IOException {
|
|
testInvariants();
|
|
}
|
|
|
|
/**
|
|
* Chain together several SymbolTables.
|
|
* @author Davis
|
|
*/
|
|
static class ChainedSymbolTable implements SymbolTable {
|
|
// TODO: add accessors?
|
|
private List symbolTables;
|
|
/**
|
|
* Each SymbolTable is each accessed in order by the other methods,
|
|
* so the first in the list is accessed first, etc.
|
|
* @param symbolTables
|
|
*/
|
|
ChainedSymbolTable(SymbolTable[] symbolTables) {
|
|
this.symbolTables = Arrays.asList(symbolTables);
|
|
}
|
|
public char[] lookup(String s) {
|
|
for (Iterator it = symbolTables.iterator(); it.hasNext();) {
|
|
SymbolTable st = (SymbolTable) it.next();
|
|
char[] result = st.lookup(s);
|
|
if (result != null) return result;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
public UnicodeMatcher lookupMatcher(int ch) {
|
|
for (Iterator it = symbolTables.iterator(); it.hasNext();) {
|
|
SymbolTable st = (SymbolTable) it.next();
|
|
UnicodeMatcher result = st.lookupMatcher(ch);
|
|
if (result != null) return result;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
// Warning: this depends on pos being left alone unless a string is returned!!
|
|
public String parseReference(String text, ParsePosition pos, int limit) {
|
|
for (Iterator it = symbolTables.iterator(); it.hasNext();) {
|
|
SymbolTable st = (SymbolTable) it.next();
|
|
String result = st.parseReference(text, pos, limit);
|
|
if (result != null) return result;
|
|
}
|
|
return null;
|
|
}
|
|
}
|
|
|
|
static final UnicodeSet INVARIANT_RELATIONS = new UnicodeSet("[\\~ \\= \\! \\? \\< \\> \u2264 \u2265 \u2282 \u2286 \u2283 \u2287]");
|
|
|
|
public static void testInvariants() throws IOException {
|
|
String[][] variables = new String[100][2];
|
|
int variableCount = 0;
|
|
PrintWriter out = BagFormatter.openUTF8Writer(UCD_Types.GEN_DIR, "UnicodeInvariantResults.txt");
|
|
out.write('\uFEFF'); // BOM
|
|
BufferedReader in = BagFormatter.openUTF8Reader("", "UnicodeInvariants.txt");
|
|
BagFormatter bf = new BagFormatter();
|
|
bf.setUnicodePropertyFactory(ToolUnicodePropertySource.make(""));
|
|
BagFormatter bf2 = new BagFormatter();
|
|
bf2.setUnicodePropertyFactory(ToolUnicodePropertySource.make(""));
|
|
bf2.setMergeRanges(false);
|
|
ChainedSymbolTable st = new ChainedSymbolTable(new SymbolTable[] {
|
|
ToolUnicodePropertySource.make("4.0.0").getSymbolTable("\u00D7"),
|
|
ToolUnicodePropertySource.make(Default.ucdVersion()).getSymbolTable("")});
|
|
ParsePosition pp = new ParsePosition(0);
|
|
int parseErrorCount = 0;
|
|
int testFailureCount = 0;
|
|
while (true) {
|
|
String line = in.readLine();
|
|
if (line == null) break;
|
|
if (line.startsWith("\uFEFF")) line = line.substring(1);
|
|
out.println(line);
|
|
line = line.trim();
|
|
int pos = line.indexOf('#');
|
|
if (pos >= 0) line = line.substring(0,pos).trim();
|
|
if (line.length() == 0) continue;
|
|
|
|
// fix all the variables
|
|
String oldLine = line;
|
|
line = Utility.replace(line, variables, variableCount);
|
|
|
|
// detect variables
|
|
if (line.startsWith("Let")) {
|
|
int x = line.indexOf('=');
|
|
variables[variableCount][0] = line.substring(3,x).trim();
|
|
variables[variableCount][1] = line.substring(x+1).trim();
|
|
variableCount++;
|
|
if (false) System.out.println("Added variable: <" + variables[variableCount-1][0] + "><"
|
|
+ variables[variableCount-1][1] + ">");
|
|
continue;
|
|
}
|
|
|
|
// detect variables
|
|
if (line.startsWith("Show")) {
|
|
String part = line.substring(4).trim();
|
|
pp.setIndex(0);
|
|
UnicodeSet leftSet = new UnicodeSet(part, pp, st);
|
|
bf2.showSetNames(out, leftSet);
|
|
continue;
|
|
}
|
|
|
|
char relation = 0;
|
|
String rightSide = null;
|
|
String leftSide = null;
|
|
UnicodeSet leftSet = null;
|
|
UnicodeSet rightSet = null;
|
|
try {
|
|
pp.setIndex(0);
|
|
leftSet = new UnicodeSet(line, pp, st);
|
|
leftSide = line.substring(0,pp.getIndex());
|
|
eatWhitespace(line, pp);
|
|
relation = line.charAt(pp.getIndex());
|
|
if (!INVARIANT_RELATIONS.contains(relation)) {
|
|
throw new ParseException("Invalid relation, must be one of " + INVARIANT_RELATIONS.toPattern(false),
|
|
pp.getIndex());
|
|
}
|
|
pp.setIndex(pp.getIndex()+1); // skip char
|
|
eatWhitespace(line, pp);
|
|
int start = pp.getIndex();
|
|
rightSet = new UnicodeSet(line, pp, st);
|
|
rightSide = line.substring(start,pp.getIndex());
|
|
eatWhitespace(line, pp);
|
|
if (line.length() != pp.getIndex()) {
|
|
throw new ParseException("Extra characters at end", pp.getIndex());
|
|
}
|
|
} catch (ParseException e) {
|
|
out.println("PARSE ERROR:\t" + line.substring(0,e.getErrorOffset())
|
|
+ "<@>" + line.substring(e.getErrorOffset()));
|
|
out.println();
|
|
out.println("**** START Error Info ****");
|
|
out.println(e.getMessage());
|
|
out.println("**** END Error Info ****");
|
|
out.println();
|
|
parseErrorCount++;
|
|
continue;
|
|
} catch (IllegalArgumentException e) {
|
|
out.println("PARSE ERROR:\t" + line);
|
|
out.println();
|
|
out.println("**** START Error Info ****");
|
|
out.println(e.getMessage());
|
|
out.println("**** END Error Info ****");
|
|
out.println();
|
|
parseErrorCount++;
|
|
continue;
|
|
}
|
|
|
|
boolean ok = true;
|
|
switch(relation) {
|
|
case '=': ok = leftSet.equals(rightSet); break;
|
|
case '<': case '\u2282': ok = rightSet.containsAll(leftSet) && !leftSet.equals(rightSet); break;
|
|
case '>': case '\u2283': ok = leftSet.containsAll(rightSet) && !leftSet.equals(rightSet); break;
|
|
case '\u2264': case '\u2286': ok = rightSet.containsAll(leftSet); break;
|
|
case '\u2265': case '\u2287': ok = leftSet.containsAll(rightSet); break;
|
|
case '!': ok = leftSet.containsNone(rightSet); break;
|
|
case '?': ok = !leftSet.equals(rightSet)
|
|
&& !leftSet.containsAll(rightSet)
|
|
&& !rightSet.containsAll(leftSet)
|
|
&& !leftSet.containsNone(rightSet);
|
|
break;
|
|
default: throw new IllegalArgumentException("Internal Error");
|
|
}
|
|
if (ok) continue;
|
|
out.println();
|
|
out.println(String.valueOf(ok).toUpperCase(Locale.ENGLISH));
|
|
out.println("**** START Error Info ****");
|
|
bf.showSetDifferences(out, rightSide, rightSet, leftSide, leftSet);
|
|
out.println("**** END Error Info ****");
|
|
out.println();
|
|
testFailureCount++;
|
|
}
|
|
out.println();
|
|
out.println("**** SUMMARY ****");
|
|
out.println();
|
|
out.println("ParseErrorCount=" + parseErrorCount);
|
|
out.println("TestFailureCount=" + testFailureCount);
|
|
out.close();
|
|
System.out.println("ParseErrorCount=" + parseErrorCount);
|
|
System.out.println("TestFailureCount=" + testFailureCount);
|
|
}
|
|
|
|
/**
|
|
* @param line
|
|
* @param pp
|
|
*/
|
|
private static void eatWhitespace(String line, ParsePosition pp) {
|
|
int cp = 0;
|
|
int i;
|
|
for (i = pp.getIndex(); i < line.length(); i += UTF16.getCharCount(cp)) {
|
|
cp = UTF16.charAt(line, i);
|
|
if (!com.ibm.icu.lang.UCharacter.isUWhiteSpace(cp)) {
|
|
break;
|
|
}
|
|
}
|
|
pp.setIndex(i);
|
|
}
|
|
}
|