scuffed-code/tools/unicodetools/com/ibm/text/UCD/TestUnicodeInvariants.java
Mark Davis 3daf3898fb ICU-0 updated for 4.1
X-SVN-Rev: 16940
2004-12-11 06:03:10 +00:00

204 lines
8.1 KiB
Java

package com.ibm.text.UCD;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.text.ParseException;
import java.text.ParsePosition;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import com.ibm.icu.dev.test.util.BagFormatter;
import com.ibm.icu.text.SymbolTable;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeMatcher;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.text.utility.Utility;
public class TestUnicodeInvariants {
public static void main(String[] args) throws IOException {
testInvariants();
}
/**
* Chain together several SymbolTables.
* @author Davis
*/
static class ChainedSymbolTable implements SymbolTable {
// TODO: add accessors?
private List symbolTables;
/**
* Each SymbolTable is each accessed in order by the other methods,
* so the first in the list is accessed first, etc.
* @param symbolTables
*/
ChainedSymbolTable(SymbolTable[] symbolTables) {
this.symbolTables = Arrays.asList(symbolTables);
}
public char[] lookup(String s) {
for (Iterator it = symbolTables.iterator(); it.hasNext();) {
SymbolTable st = (SymbolTable) it.next();
char[] result = st.lookup(s);
if (result != null) return result;
}
return null;
}
public UnicodeMatcher lookupMatcher(int ch) {
for (Iterator it = symbolTables.iterator(); it.hasNext();) {
SymbolTable st = (SymbolTable) it.next();
UnicodeMatcher result = st.lookupMatcher(ch);
if (result != null) return result;
}
return null;
}
// Warning: this depends on pos being left alone unless a string is returned!!
public String parseReference(String text, ParsePosition pos, int limit) {
for (Iterator it = symbolTables.iterator(); it.hasNext();) {
SymbolTable st = (SymbolTable) it.next();
String result = st.parseReference(text, pos, limit);
if (result != null) return result;
}
return null;
}
}
static final UnicodeSet INVARIANT_RELATIONS = new UnicodeSet("[\\~ \\= \\! \\? \\< \\> \u2264 \u2265 \u2282 \u2286 \u2283 \u2287]");
public static void testInvariants() throws IOException {
String[][] variables = new String[100][2];
int variableCount = 0;
PrintWriter out = BagFormatter.openUTF8Writer(UCD_Types.GEN_DIR, "UnicodeInvariantResults.txt");
out.write('\uFEFF'); // BOM
BufferedReader in = BagFormatter.openUTF8Reader("", "UnicodeInvariants.txt");
BagFormatter bf = new BagFormatter();
ChainedSymbolTable st = new ChainedSymbolTable(new SymbolTable[] {
ToolUnicodePropertySource.make("4.0.0").getSymbolTable("\u00D7"),
ToolUnicodePropertySource.make(Default.ucdVersion()).getSymbolTable("")});
ParsePosition pp = new ParsePosition(0);
int parseErrorCount = 0;
int testFailureCount = 0;
while (true) {
String line = in.readLine();
if (line == null) break;
if (line.startsWith("\uFEFF")) line = line.substring(1);
out.println(line);
line = line.trim();
int pos = line.indexOf('#');
if (pos >= 0) line = line.substring(0,pos).trim();
if (line.length() == 0) continue;
// fix all the variables
String oldLine = line;
line = Utility.replace(line, variables, variableCount);
// detect variables
if (line.startsWith("Let")) {
int x = line.indexOf('=');
variables[variableCount][0] = line.substring(3,x).trim();
variables[variableCount][1] = line.substring(x+1).trim();
variableCount++;
if (false) System.out.println("Added variable: <" + variables[variableCount-1][0] + "><"
+ variables[variableCount-1][1] + ">");
continue;
}
char relation = 0;
String rightSide = null;
String leftSide = null;
UnicodeSet leftSet = null;
UnicodeSet rightSet = null;
try {
pp.setIndex(0);
leftSet = new UnicodeSet(line, pp, st);
leftSide = line.substring(0,pp.getIndex());
eatWhitespace(line, pp);
relation = line.charAt(pp.getIndex());
if (!INVARIANT_RELATIONS.contains(relation)) {
throw new ParseException("Invalid relation, must be one of " + INVARIANT_RELATIONS.toPattern(false),
pp.getIndex());
}
pp.setIndex(pp.getIndex()+1); // skip char
eatWhitespace(line, pp);
int start = pp.getIndex();
rightSet = new UnicodeSet(line, pp, st);
rightSide = line.substring(start,pp.getIndex());
eatWhitespace(line, pp);
if (line.length() != pp.getIndex()) {
throw new ParseException("Extra characters at end", pp.getIndex());
}
} catch (ParseException e) {
out.println("PARSE ERROR:\t" + line.substring(0,e.getErrorOffset())
+ "<@>" + line.substring(e.getErrorOffset()));
out.println();
out.println("**** START Error Info ****");
out.println(e.getMessage());
out.println("**** END Error Info ****");
out.println();
parseErrorCount++;
continue;
} catch (IllegalArgumentException e) {
out.println("PARSE ERROR:\t" + line);
out.println();
out.println("**** START Error Info ****");
out.println(e.getMessage());
out.println("**** END Error Info ****");
out.println();
parseErrorCount++;
continue;
}
boolean ok = true;
switch(relation) {
case '=': ok = leftSet.equals(rightSet); break;
case '<': case '\u2282': ok = rightSet.containsAll(leftSet) && !leftSet.equals(rightSet); break;
case '>': case '\u2283': ok = leftSet.containsAll(rightSet) && !leftSet.equals(rightSet); break;
case '\u2264': case '\u2286': ok = rightSet.containsAll(leftSet); break;
case '\u2265': case '\u2287': ok = leftSet.containsAll(rightSet); break;
case '!': ok = leftSet.containsNone(rightSet); break;
case '?': ok = !leftSet.equals(rightSet)
&& !leftSet.containsAll(rightSet)
&& !rightSet.containsAll(leftSet)
&& !leftSet.containsNone(rightSet);
break;
default: throw new IllegalArgumentException("Internal Error");
}
if (ok) continue;
out.println();
out.println(String.valueOf(ok).toUpperCase(Locale.ENGLISH));
out.println("**** START Error Info ****");
bf.showSetDifferences(out, rightSide, rightSet, leftSide, leftSet);
out.println("**** END Error Info ****");
out.println();
testFailureCount++;
}
out.println();
out.println("**** SUMMARY ****");
out.println();
out.println("ParseErrorCount=" + parseErrorCount);
out.println("TestFailureCount=" + testFailureCount);
out.close();
System.out.println("ParseErrorCount=" + parseErrorCount);
System.out.println("TestFailureCount=" + testFailureCount);
}
/**
* @param line
* @param pp
*/
private static void eatWhitespace(String line, ParsePosition pp) {
int cp = 0;
int i;
for (i = pp.getIndex(); i < line.length(); i += UTF16.getCharCount(cp)) {
cp = UTF16.charAt(line, i);
if (!com.ibm.icu.lang.UCharacter.isUWhiteSpace(cp)) {
break;
}
}
pp.setIndex(i);
}
}