cldrbug 339: fix cldr tests

X-SVN-Rev: 16716
This commit is contained in:
Mark Davis 2004-11-02 00:53:21 +00:00
parent 6faeb8893d
commit 95c8013cdc
3 changed files with 283 additions and 44 deletions

View File

@ -45,6 +45,7 @@ import com.ibm.icu.text.Normalizer;
import com.ibm.icu.text.NumberFormat;
import com.ibm.icu.text.RuleBasedCollator;
import com.ibm.icu.text.SimpleDateFormat;
import com.ibm.icu.text.Transliterator;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UnicodeSetIterator;
@ -65,6 +66,7 @@ import com.ibm.icu.dev.tool.cldr.ICUResourceWriter.ResourceTable;
* TODO Get the data directly from the CLDR tree.
* @author medavis
*/
public class GenerateCldrTests {
static private PrintWriter log;
@ -75,7 +77,8 @@ public class GenerateCldrTests {
DESTDIR = 2,
LOGDIR = 3,
SOURCEDIR =4,
MATCH = 5;
MATCH = 5,
FULLY_RESOLVED = 6;
private static final UOption[] options = {
UOption.HELP_H(),
@ -84,6 +87,7 @@ public class GenerateCldrTests {
UOption.create("log", 'l', UOption.REQUIRES_ARG).setDefault("C:\\DATA\\GEN\\cldr\\test\\"),
UOption.SOURCEDIR().setDefault("C:\\ICU4C\\locale\\common\\"),
UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"),
UOption.create("fullyresolved", 'f', UOption.NO_ARG),
};
CldrCollations cldrCollations;
@ -254,7 +258,7 @@ public class GenerateCldrTests {
out.println("<!DOCTYPE ldml SYSTEM 'http://www.unicode.org/cldr/dtd/1.2/beta/cldrTest.dtd'>");
out.println("<!-- For information, see readme.html -->");
out.println(" <cldrTest version='1.2' base='" + locale + "'>");
out.println(" <!-- " + BagFormatter.toHTML.transliterate(
out.println(" <!-- " + BagFormatter.toXML.transliterate(
locale.getDisplayName(ULocale.ENGLISH) + " ["
+ locale.getDisplayName(locale))
+ "] -->");
@ -263,6 +267,7 @@ public class GenerateCldrTests {
generateItems(locale, collationLocales, CollationEquator, CollationShower);
out.println(" </cldrTest>");
out.close();
GenerateSidewaysView.generateBat(options[SOURCEDIR].value + "test" + File.separator, locale + ".xml", options[DESTDIR].value, locale + ".xml");
}
/*
@ -609,7 +614,7 @@ public class GenerateCldrTests {
}
tailored = createCaseClosure(tailored);
tailored = nfc(tailored);
System.out.println(tailored.toPattern(true));
//System.out.println(tailored.toPattern(true));
UnicodeSet exemplars = getExemplarSet(locale, UnicodeSet.CASE);
// add all the exemplars
@ -619,7 +624,7 @@ public class GenerateCldrTests {
exemplars = createCaseClosure(exemplars);
exemplars = nfc(exemplars);
System.out.println(exemplars.toPattern(true));
//System.out.println(exemplars.toPattern(true));
tailored.addAll(exemplars);
//UnicodeSet tailoredMinusHan = new UnicodeSet(tailored).removeAll(SKIP_COLLATION_SET);
if (!exemplars.containsAll(tailored)) {
@ -684,6 +689,7 @@ public class GenerateCldrTests {
for (int i = 0; i < files.length; ++i) {
String name = files[i].getName();
if (!name.endsWith(".xml")) continue;
if (name.startsWith("supplementalData")) continue;
String locale = name.substring(0,name.length()-4); // drop .xml
if (!locale.equals("root") && !m.reset(locale).matches()) continue;
s.add(locale);
@ -734,10 +740,15 @@ public class GenerateCldrTests {
}
void getInfo(String locale) {
//System.out.println(locale);
System.out.println("Getting info for: " + locale);
locales.add(new ULocale(locale));
// Document doc = LDMLUtilities.getFullyResolvedLDML(sourceDir, locale, false, false, false);
Document doc = LDMLUtilities.parse(sourceDir + locale + ".xml", false);
Document doc;
if (options[FULLY_RESOLVED].doesOccur) {
doc = LDMLUtilities.getFullyResolvedLDML(sourceDir, locale,
false, false, false);
} else {
doc = LDMLUtilities.parse(sourceDir + locale + ".xml", false);
}
Node node = LDMLUtilities.getNode(doc, "//ldml/characters/exemplarCharacters");
if (node == null) return;
if (isDraft(node)) System.out.println("Skipping draft: " + locale + ", " + getXPath(node));
@ -863,6 +874,7 @@ public class GenerateCldrTests {
}
return source;
}
static Transliterator fromHex = Transliterator.getInstance("hex-any");
private void getCollationRules(String locale) throws Exception {
System.out.println(locale);
@ -889,9 +901,18 @@ public class GenerateCldrTests {
} else
*/
if (foo.name.equals("Sequence")) {
String rules = foo.val;
RuleBasedCollator fixed = generateCollator(locale, current.name, foo.name, foo.val);
if (fixed != null) types_rules.put(current.name, fixed);
// remove the \ u's, because they blow up
String rules = fromHex.transliterate(foo.val);
RuleBasedCollator fixed = generateCollator(locale, current.name, foo.name, rules);
if (fixed != null) {
log.println("Rules for: " + locale + "," + current.name);
log.println(rules);
if (!rules.equals(foo.val)) {
log.println("Original Rules from Ram: ");
log.println(foo.val);
}
types_rules.put(current.name, fixed);
}
}
}
}

View File

@ -35,6 +35,7 @@ import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.XMLReaderFactory;
import org.xml.sax.ContentHandler;
import org.xml.sax.ext.DeclHandler;
import org.xml.sax.ext.LexicalHandler;
import org.xml.sax.XMLReader;
@ -62,7 +63,15 @@ import com.ibm.icu.util.UResourceBundle;
* by_type/X.html, where X is a type. X may be the concatenation of more than more than
* one element, where the file would otherwise be too large.
* @author medavis
*/public class GenerateSidewaysView {
*/
/*
Notes:
http://xml.apache.org/xerces2-j/faq-grammars.html#faq-3
http://developers.sun.com/dev/coolstuff/xml/readme.html
http://lists.xml.org/archives/xml-dev/200007/msg00284.html
http://java.sun.com/j2se/1.4.2/docs/api/org/xml/sax/DTDHandler.html
*/
public class GenerateSidewaysView {
// debug flags
static final boolean DEBUG = false;
static final boolean DEBUG2 = false;
@ -80,7 +89,8 @@ import com.ibm.icu.util.UResourceBundle;
MATCH = 4,
SKIP = 5,
TZADIR = 6,
NONVALIDATING = 7;
NONVALIDATING = 7,
SHOW_DTD = 8;
private static final String NEWLINE = "\n";
@ -93,6 +103,7 @@ import com.ibm.icu.util.UResourceBundle;
UOption.create("skip", 'z', UOption.REQUIRES_ARG).setDefault("zh_(C|S|HK|M).*"),
UOption.create("tzadir", 't', UOption.REQUIRES_ARG).setDefault("C:\\ICU4J\\icu4j\\src\\com\\ibm\\icu\\dev\\tool\\cldr\\"),
UOption.create("nonvalidating", 'n', UOption.NO_ARG),
UOption.create("dtd", 'w', UOption.NO_ARG),
};
private static String timeZoneAliasDir = null;
@ -120,7 +131,7 @@ import com.ibm.icu.util.UResourceBundle;
GenerateSidewaysView temp = getCLDR(baseName, !options[NONVALIDATING].doesOccur);
// if (baseName.equals("zh_TW")) baseName = "zh_Hant_TW";
// if (baseName.equals("root")) temp.addMissing();
if (options[SHOW_DTD].doesOccur) temp.writeDTDCheck();
temp.writeTo(options[DESTDIR].value, baseName);
generateBat(options[SOURCEDIR].value, baseName + ".xml", options[DESTDIR].value, baseName + ".xml");
sidewaysView.putData(temp.data, baseName);
@ -129,10 +140,18 @@ import com.ibm.icu.util.UResourceBundle;
sidewaysView.showCacheData();
} finally {
log.close();
System.out.println("Done");
}
}
static Collator DEFAULT_COLLATION = null;
/**
*
*/
private void writeDTDCheck() {
DEFAULT_DECLHANDLER.checkData();
}
static Collator DEFAULT_COLLATION = null;
static final Set IGNOREABLE = new HashSet(Arrays.asList(new String[] {
"draft",
@ -195,6 +214,7 @@ import com.ibm.icu.util.UResourceBundle;
OrderedMap data = new OrderedMap();
MyContentHandler DEFAULT_HANDLER = new MyContentHandler();
MyDeclHandler DEFAULT_DECLHANDLER = new MyDeclHandler();
XMLReader xmlReader;
/*SAXParser SAX;
@ -280,6 +300,7 @@ import com.ibm.icu.util.UResourceBundle;
xmlReader.setContentHandler(DEFAULT_HANDLER);
xmlReader.setProperty("http://xml.org/sax/properties/lexical-handler",DEFAULT_HANDLER);
if (options[SHOW_DTD].doesOccur) xmlReader.setProperty("http://xml.org/sax/properties/declaration-handler", DEFAULT_DECLHANDLER);
readFrom(options[SOURCEDIR].value, filename);
// walk through the map removing anything that is inherited from a parent.
@ -523,7 +544,7 @@ import com.ibm.icu.util.UResourceBundle;
if (path) {
return "[@" + name + "='" + BagFormatter.toHTML.transliterate(value) + "']";
} else {
return " " + name + "=\"" + BagFormatter.toHTML.transliterate(value) + "\"";
return " " + name + "=\"" + BagFormatter.toXML.transliterate(value) + "\"";
}
}
public int compareTo(Object o) {
@ -1671,6 +1692,147 @@ import com.ibm.icu.util.UResourceBundle;
}
}
class MyDeclHandler implements DeclHandler {
Map element_childComparator = new TreeMap();
boolean showReason = false;
Set SKIP_LIST = new HashSet(Arrays.asList(new String[] {
"collation", "base", "settings", "suppress_contractions", "optimize", "rules", "reset",
"context", "p", "pc", "s", "sc", "t", "tc", "q", "qc", "i", "ic", "extend", "x"
}));
Object DONE = new Object(); // marker
public void checkData() {
// verify that the ordering is the consistent for all child elements
// do this by building an ordering from the lists.
// The first item has no greater item in any set. So find an item that is only first
showReason = false;
List orderingList = new ArrayList();
while (true) {
Object first = getFirst(orderingList);
if (first == DONE) {
log.println("Successful Ordering");
int count = 0;
for (Iterator it = orderingList.iterator(); it.hasNext();) log.println(++count + it.next().toString());
break;
}
if (first != null) {
orderingList.add(first);
} else {
showReason = true;
getFirst(orderingList);
log.println();
log.println("Failed ordering. So far:");
for (Iterator it = orderingList.iterator(); it.hasNext();) log.print("\t" + it.next());
log.println();
log.println("Items:");
for (Iterator it = element_childComparator.keySet().iterator(); it.hasNext();) showRow(it.next(), true);
log.println();
break;
}
}
}
/**
* @param parent
* @param skipEmpty TODO
*/
private void showRow(Object parent, boolean skipEmpty) {
List items = (List) element_childComparator.get(parent);
if (skipEmpty && items.size() == 0) return;
log.print(parent);
for (Iterator it2 = items.iterator(); it2.hasNext();) log.print("\t" + it2.next());
log.println();
}
/**
* @param orderingList
*/
private Object getFirst(List orderingList) {
Set keys = element_childComparator.keySet();
Set failures = new HashSet();
boolean allZero = true;
for (Iterator it = keys.iterator(); it.hasNext();) {
List list = (List) element_childComparator.get(it.next());
if (list.size() != 0) {
allZero = false;
Object possibleFirst = list.get(0);
if (!failures.contains(possibleFirst) && isAlwaysFirst(possibleFirst)) {
// we survived the guantlet. add to ordering list, remove from the mappings
removeEverywhere(possibleFirst);
return possibleFirst;
} else {
failures.add(possibleFirst);
}
}
}
if (allZero) return DONE;
return null;
}
/**
* @param keys
* @param it
* @param possibleFirst
*/
private void removeEverywhere(Object possibleFirst) {
// and remove from all the lists
for (Iterator it2 = element_childComparator.keySet().iterator(); it2.hasNext();) {
List list2 = (List) element_childComparator.get(it2.next());
list2.remove(possibleFirst);
}
}
private boolean isAlwaysFirst(Object possibleFirst) {
if (showReason) log.println("Trying: " + possibleFirst);
for (Iterator it2 = element_childComparator.keySet().iterator(); it2.hasNext();) {
Object key = it2.next();
List list2 = (List) element_childComparator.get(key);
int pos = list2.indexOf(possibleFirst);
if (pos > 0) {
if (showReason) {
log.print("Failed at:\t");
showRow(key, false);
}
return false;
}
}
return true;
}
// refine later; right now, doesn't handle multiple elements well.
public void elementDecl(String name, String model) throws SAXException {
if (SKIP_LIST.contains(name)) return;
//log.println("Element\t" + name + "\t" + model);
String[] list = model.split("[^A-Z0-9a-z]");
List mc = new ArrayList();
if (name.equals("currency")) {
mc.add("alias");
mc.add("symbol");
mc.add("pattern");
}
for (int i = 0; i < list.length; ++i) {
if (list[i].length() == 0) continue;
//log.print("\t" + list[i]);
if (mc.contains(list[i])) {
log.println("Duplicate attribute " + name + ", " + list[i]);
} else {
mc.add(list[i]);
}
}
element_childComparator.put(name, mc);
//log.println();
}
public void attributeDecl(String eName, String aName, String type, String mode, String value) throws SAXException {
//log.println("Attribute\t" + eName + "\t" + aName + "\t" + type + "\t" + mode + "\t" + value);
}
public void internalEntityDecl(String name, String value) throws SAXException {
//log.println("Internal Entity\t" + name + "\t" + value);
}
public void externalEntityDecl(String name, String publicId, String systemId) throws SAXException {
//log.println("Internal Entity\t" + name + "\t" + publicId + "\t" + systemId);
}
}
class MyContentHandler implements ContentHandler, LexicalHandler {
ElementChain contextStack = new ElementChain();

View File

@ -11,10 +11,13 @@ package com.ibm.icu.dev.tool.cldr;
import java.io.File;
import java.io.PrintWriter;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.Iterator;
@ -27,11 +30,14 @@ import org.xml.sax.helpers.DefaultHandler;
import com.ibm.icu.util.TimeZone;
import com.ibm.icu.util.ULocale;
import com.ibm.icu.dev.test.util.BagFormatter;
import com.ibm.icu.dev.tool.UOption;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.text.Collator;
import com.ibm.icu.text.DateFormat;
import com.ibm.icu.text.NumberFormat;
import com.ibm.icu.text.SimpleDateFormat;
import com.ibm.icu.text.Transliterator;
/**
* This is a file that runs the CLDR tests for ICU4J, to verify that ICU4J implements them
@ -41,31 +47,44 @@ import com.ibm.icu.text.SimpleDateFormat;
public class TestCldr {
static final boolean DEBUG = false;
ULocale uLocale = ULocale.ENGLISH;
Locale oLocale = Locale.ENGLISH; // TODO Drop once ICU4J has ULocale everywhere
PrintWriter log;
//ULocale uLocale = ULocale.ENGLISH;
//Locale oLocale = Locale.ENGLISH; // TODO Drop once ICU4J has ULocale everywhere
static PrintWriter log;
SAXParser SAX;
private static final int
HELP1 = 0,
HELP2 = 1,
SOURCEDIR = 2,
LOGDIR = 3;
LOGDIR = 3,
MATCH = 4;
private static final UOption[] options = {
UOption.HELP_H(),
UOption.HELP_QUESTION_MARK(),
UOption.SOURCEDIR().setDefault("C:\\ICU4C\\locale\\common\\test\\"),
UOption.create("log", 'l', UOption.REQUIRES_ARG).setDefault("")
UOption.create("log", 'l', UOption.REQUIRES_ARG).setDefault(""),
UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"),
};
public static void main(String[] args) throws Exception {
UOption.parseArgs(args, options);
TestCldr x = new TestCldr();
x.test();
log = BagFormatter.openUTF8Writer(options[LOGDIR].value, "log.txt");
try {
TestCldr x = new TestCldr();
x.test();
} finally {
log.close();
System.out.println("Done");
}
}
public void test() throws Exception {
Set s = GenerateCldrTests.getMatchingXMLFiles(options[SOURCEDIR].value, options[MATCH].value);
for (Iterator it = s.iterator(); it.hasNext();) {
test((String) it.next());
}
/*
// test("hu");
File[] list = new File(options[SOURCEDIR].value).listFiles();
for (int i = 0; i < list.length; ++i) {
@ -73,22 +92,31 @@ public class TestCldr {
if (!name.endsWith(".xml")) continue;
test(name.substring(0,name.length()-4));
}
*/
}
public void test(String localeName) throws Exception {
System.out.println("Testing " + localeName);
uLocale = new ULocale(localeName);
oLocale = uLocale.toLocale();
//uLocale = new ULocale(localeName);
//oLocale = uLocale.toLocale();
File f = new File(options[SOURCEDIR].value + localeName + ".xml");
System.out.println("Testing " + f.getCanonicalPath());
log.println("Testing " + f.getCanonicalPath());
SAX.parse(f, DEFAULT_HANDLER);
}
static Transliterator toUnicode = Transliterator.getInstance("any-hex");
static public String showString(String in) {
return "\u00AB" + in + "\u00BB (" + toUnicode.transliterate(in) + ")";
}
// ============ SAX Handler Infrastructure ============
abstract public class Handler {
Map settings = new TreeMap();
String name;
List currentLocales = new ArrayList();
int failures = 0;
void setName(String name) {
this.name = name;
}
@ -98,10 +126,18 @@ public class TestCldr {
}
void checkResult(String value) {
try {
handleResult(value);
for (int i = 0; i < currentLocales.size(); ++i) {
ULocale ul = (ULocale)currentLocales.get(i);
log.println(" Checking " + ul + "(" + ul.getDisplayName(ULocale.ENGLISH) + ")" + " for " + name);
handleResult(ul, value);
if (failures != 0) {
System.out.println("\tTotal Failures: " + failures + "\t" + ul + "(" + ul.getDisplayName(ULocale.ENGLISH) + ")");
failures = 0;
}
}
} catch (Exception e) {
logln("Exception with result: <" + value + ">");
e.printStackTrace();
e.printStackTrace(log);
}
}
public void logln(String message) {
@ -111,7 +147,7 @@ public class TestCldr {
String attributeValue = (String) settings.get(attributeName);
temp += " " + attributeName + "=<" + attributeValue + ">";
}
System.out.println(temp + "]");
log.println(temp + "]");
}
int lookupValue(Object x, Object[] list) {
for (int i = 0; i < list.length; ++i) {
@ -120,13 +156,30 @@ public class TestCldr {
logln("Unknown String: " + x);
return -1;
}
abstract void handleResult(String value) throws Exception;
abstract void handleResult(ULocale currentLocale, String value) throws Exception;
/**
* @param attributes
*/
public void setAttributes(Attributes attributes) {
String localeList = attributes.getValue("locales");
String[] currentLocaleString = new String[50];
Utility.split(localeList, ' ', currentLocaleString);
currentLocales.clear();
for (int i = 0; i < currentLocaleString.length; ++i) {
if (currentLocaleString[i].length() == 0) continue;
currentLocales.add(new ULocale(currentLocaleString[i]));
}
}
}
public Handler getHandler(String name) {
public Handler getHandler(String name, Attributes attributes) {
if (DEBUG) System.out.println("Creating Handler: " + name);
Handler result = (Handler) RegisteredHandlers.get(name);
if (result == null) System.out.println("Unexpected test type: " + name);
else {
result.setAttributes(attributes);
}
return result;
}
@ -151,9 +204,10 @@ public class TestCldr {
// ============ Handler for Collation ============
{
addHandler("collation", new Handler() {
public void handleResult(String value) {
Collator col = Collator.getInstance(uLocale);
public void handleResult(ULocale currentLocale, String value) {
Collator col = Collator.getInstance(currentLocale);
String lastLine = "";
int count = 0;
for (int pos = 0; pos < value.length();) {
int nextPos = value.indexOf('\n', pos);
if (nextPos < 0)
@ -162,20 +216,22 @@ public class TestCldr {
if (line.length() != 0) { // HACK for SAX
int comp = col.compare(lastLine, line);
if (comp > 0) {
logln("Failure: <" + lastLine + "> should be leq <" + line + ">");
failures++;
logln("\tLine " + (count + 1) + "\tFailure: " + showString(lastLine) + " should be leq " + showString(line));
} else if (DEBUG) {
System.out.println("OK: " + line);
}
}
pos = nextPos + 1;
lastLine = line;
count++;
}
}
});
// ============ Handler for Numbers ============
addHandler("number", new Handler() {
public void handleResult(String result) {
public void handleResult(ULocale locale, String result) {
NumberFormat nf = null;
double v = Double.NaN;
for (Iterator it = settings.keySet().iterator(); it.hasNext();) {
@ -189,11 +245,11 @@ public class TestCldr {
// must be either numberType at this point
int index = lookupValue(attributeValue, NumberNames);
switch(index) {
case 0: nf = NumberFormat.getInstance(oLocale); break;
case 1: nf = NumberFormat.getIntegerInstance(oLocale); break;
case 2: nf = NumberFormat.getNumberInstance(oLocale); break;
case 3: nf = NumberFormat.getPercentInstance(oLocale); break;
case 4: nf = NumberFormat.getScientificInstance(oLocale); break;
case 0: nf = NumberFormat.getInstance(locale); break;
case 1: nf = NumberFormat.getIntegerInstance(locale); break;
case 2: nf = NumberFormat.getNumberInstance(locale); break;
case 3: nf = NumberFormat.getPercentInstance(locale); break;
case 4: nf = NumberFormat.getScientificInstance(locale); break;
}
String temp = nf.format(v).trim();
result = result.trim(); // HACK because of SAX
@ -207,7 +263,7 @@ public class TestCldr {
// ============ Handler for Dates ============
addHandler("date", new Handler() {
public void handleResult(String result) throws ParseException {
public void handleResult(ULocale locale, String result) throws ParseException {
int dateFormat = DateFormat.DEFAULT;
int timeFormat = DateFormat.DEFAULT;
Date date = new Date();
@ -228,9 +284,9 @@ public class TestCldr {
timeFormat = value;
}
DateFormat dt = dateFormat == -1 ? DateFormat.getTimeInstance(timeFormat, oLocale)
: timeFormat == -1 ? DateFormat.getDateInstance(dateFormat, oLocale)
: DateFormat.getDateTimeInstance(dateFormat, timeFormat, oLocale);
DateFormat dt = dateFormat == -1 ? DateFormat.getTimeInstance(timeFormat, locale)
: timeFormat == -1 ? DateFormat.getDateInstance(dateFormat, locale)
: DateFormat.getDateTimeInstance(dateFormat, timeFormat, locale);
dt.setTimeZone(utc);
String temp = dt.format(date).trim();
result = result.trim(); // HACK because of SAX
@ -276,7 +332,7 @@ public class TestCldr {
handler.set(attributes.getQName(i), attributes.getValue(i));
}
} else {
handler = getHandler(qName);
handler = getHandler(qName, attributes);
//handler.set("locale", uLocale.toString());
}
//if (DEBUG) System.out.println("startElement:\t" + contextStack);