ICU-1436 beefed up test & chart pgm. didn't add all indics yet, though

X-SVN-Rev: 6608
This commit is contained in:
Mark Davis 2001-11-03 05:44:33 +00:00
parent 5d99d95fa5
commit 5102c9e434
4 changed files with 218 additions and 30 deletions

View File

@ -15,7 +15,13 @@ public class RoundTripTest extends TestFmwk {
public static void main(String[] args) throws Exception {
new RoundTripTest().run(args);
}
/*
public void TestSingle() throws IOException, ParseException {
Transliterator t = Transliterator.getInstance("Latin-Greek");
String s = t.transliterate("\u0101\u0069");
}
*/
public void TestHiragana() throws IOException, ParseException {
new Test("Latin-Hiragana",
TestUtility.LATIN_SCRIPT, TestUtility.HIRAGANA_SCRIPT)
@ -97,6 +103,12 @@ public class RoundTripTest extends TestFmwk {
.test(null, "[:Devanagari:]", null, this, new Legal());
}
public void TestDevanagariTamil() throws IOException, ParseException {
new Test("Tamil-DEVANAGARI",
TestUtility.TAMIL_SCRIPT, TestUtility.DEVANAGARI_SCRIPT)
.test("[:tamil:]", "[:Devanagari:]", null, this, new Legal());
}
public static class Legal {
public boolean is(String sourceString) {return true;}
}
@ -184,6 +196,7 @@ public class RoundTripTest extends TestFmwk {
UnicodeSet roundtripExclusions;
TestLog log;
Legal legalSource;
UnicodeSet badCharacters;
/*
* create a test for the given script transliterator.
@ -215,6 +228,15 @@ public class RoundTripTest extends TestFmwk {
return false;
}
public boolean includesSome(UnicodeSet set, String a) {
int cp;
for (int i = 0; i < a.length(); i += UTF16.getCharCount(cp)) {
cp = UTF16.charAt(a, i);
if (set.contains(cp)) return true;
}
return false;
}
public static boolean isCamel(String a) {
//System.out.println("CamelTest");
// see if string is of the form aB; e.g. lower, then upper or title
@ -263,6 +285,8 @@ public class RoundTripTest extends TestFmwk {
log.logln(Utility.escape("Source: " + this.sourceRange));
log.logln(Utility.escape("Target: " + this.targetRange));
log.logln(Utility.escape("Exclude: " + this.roundtripExclusions));
badCharacters = new UnicodeSet("[:other:]");
// make a UTF-8 output file we can read with a browser
@ -310,10 +334,16 @@ public class RoundTripTest extends TestFmwk {
if (TestUtility.isUnassigned(c) ||
!isSource(c)) continue;
String cs = String.valueOf(c);
String targ = sourceToTarget.transliterate(String.valueOf(cs));
if (!isReceivingTarget(targ)) {
String targ = sourceToTarget.transliterate(cs);
if (!isReceivingTarget(targ) || includesSome(badCharacters, targ)) {
logWrongScript("Source-Target", cs, targ);
failSourceTarg.set(c);
} else {
String cs2 = Normalizer.normalize(cs, Normalizer.DECOMP, 0);
String targ2 = sourceToTarget.transliterate(cs2);
if (!targ.equals(targ2)) {
logNotCanonical("Source-Target", cs, targ, targ2);
}
}
}
@ -331,10 +361,16 @@ public class RoundTripTest extends TestFmwk {
String cs = String.valueOf(c) + d;
String targ = sourceToTarget.transliterate(cs);
if (!isReceivingTarget(targ)) {
if (!isReceivingTarget(targ) || includesSome(badCharacters, targ)) {
logWrongScript("Source-Target", cs, targ);
} else {
String cs2 = Normalizer.normalize(cs, Normalizer.DECOMP, 0);
String targ2 = sourceToTarget.transliterate(cs2);
if (!targ.equals(targ2)) {
logNotCanonical("Source-Target", cs, targ, targ2);
}
}
}
}
log.logln("Checking that target characters convert to source and back - Singles");
@ -348,12 +384,18 @@ public class RoundTripTest extends TestFmwk {
String cs = String.valueOf(c);
String targ = targetToSource.transliterate(cs);
String reverse = sourceToTarget.transliterate(targ);
if (!isReceivingSource(targ)) {
if (!isReceivingSource(targ) || includesSome(badCharacters, targ)) {
logWrongScript("Target-Source", cs, targ);
failTargSource.set(c);
} else if (!isSame(cs, reverse) && !roundtripExclusions.contains(c)) {
logRoundTripFailure(cs, targ, reverse);
failRound.set(c);
} else {
String targ2 = Normalizer.normalize(targ, Normalizer.DECOMP, 0);
String reverse2 = sourceToTarget.transliterate(targ2);
if (!reverse.equals(reverse2)) {
logNotCanonical("Target-Source", cs, targ, targ2);
}
}
}
@ -375,11 +417,18 @@ public class RoundTripTest extends TestFmwk {
String cs = buf.toString();
String targ = targetToSource.transliterate(cs);
String reverse = sourceToTarget.transliterate(targ);
if (!isReceivingSource(targ) && !failTargSource.get(c) && !failTargSource.get(d)) {
if (!isReceivingSource(targ) && !failTargSource.get(c) && !failTargSource.get(d)
|| includesSome(badCharacters, targ)) {
logWrongScript("Target-Source", cs, targ);
} else if (!isSame(cs, reverse) && !failRound.get(c) && !failRound.get(d)
&& !roundtripExclusions.contains(c) && !roundtripExclusions.contains(d)) {
logRoundTripFailure(cs, targ, reverse);
} else {
String targ2 = Normalizer.normalize(targ, Normalizer.DECOMP, 0);
String reverse2 = sourceToTarget.transliterate(targ2);
if (!reverse.equals(reverse2)) {
logNotCanonical("Target-Source", cs, targ, targ2);
}
}
}
}
@ -398,6 +447,20 @@ public class RoundTripTest extends TestFmwk {
);
}
final void logNotCanonical(String label, String from, String to, String toCan) {
if (++errorCount >= errorLimit) {
throw new TestTruncated("Test truncated; too many failures");
}
out.println("<br>Fail (can.equiv)" + label + ": " +
from + " (" +
TestUtility.hex(from) + ") => " +
to + " (" +
TestUtility.hex(to) + ")" +
toCan + " (" +
TestUtility.hex(to) + ")"
);
}
final void logRoundTripFailure(String from, String to, String back) {
if (!legalSource.is(from)) return; // skip illegals

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/WriteCharts.java,v $
* $Date: 2001/11/02 23:49:39 $
* $Revision: 1.3 $
* $Date: 2001/11/03 05:44:32 $
* $Revision: 1.4 $
*
*****************************************************************************************
*/
@ -22,6 +22,7 @@ import java.io.*;
public class WriteCharts {
public static void main(String[] args) throws IOException {
String testSet = "";
if (args.length == 0) args = all;
for (int i = 0; i < args.length; ++i) {
// Enumeration enum = Transliterator.getAvailableIDs();
if (args[i].startsWith("[")) {
@ -33,6 +34,13 @@ public class WriteCharts {
}
}
static final String[] all = {
"Cyrillic-Latin", "Greek-Latin",
"el-Latin",
"Devanagari-Tamil", "Devanagari-Latin",
"Katakana-Latin", "Hiragana-Latin", "Hangul-Latin"
};
public static void print(String testSet, String rawId) throws IOException {
Transliterator t = Transliterator.getInstance(rawId);
String id = t.getID();
@ -70,12 +78,14 @@ public class WriteCharts {
Transliterator inverse = t.getInverse();
Transliterator hex = Transliterator.getInstance("Any-Hex");
// iterate through script
System.out.println("Transliterating " + sourceSet.toPattern(true)
+ " with " + Transliterator.getDisplayName(id));
UnicodeSet leftOverSet = new UnicodeSet(targetSet);
UnicodeSet privateUse = new UnicodeSet("[:private use:]");
Map map = new TreeMap();
@ -99,11 +109,15 @@ public class WriteCharts {
} else if (!ss.equals(rt)) {
group |= 4;
}
if ((group & 0x7F) != 0) flag = "</td><td>" + hex.transliterate(ss) + "; " + hex.transliterate(ts) + "; " + hex.transliterate(rt);
if (containsSome(privateUse, ts) || containsSome(privateUse, rt)) {
group |= 16;
}
map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ss, Normalizer.DECOMP_COMPAT, 0)) + ss,
"<tr><td>" + ss + "</td><td>" + ts + "</td><td>" + rt + "</td><td>" + flag + "</td></tr>" );
"<tr><td>" + ss + "<br><tt>" + hex.transliterate(ss) + "</tt></td><td>"
+ ts + "<br><tt>" + hex.transliterate(ts) + "</tt></td><td>"
+ rt + "<br><tt>" + hex.transliterate(rt) + "</tt></td></tr>" );
}
}
@ -124,10 +138,13 @@ public class WriteCharts {
if (!isIn(rt, sourceSet)) {
group |= 8;
}
if ((group & 0x7F) != 0) flag = "</td><td>" + hex.transliterate(ts) + "; " + hex.transliterate(rt);
if (containsSome(privateUse, rt)) {
group |= 16;
}
map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ts, Normalizer.DECOMP_COMPAT, 0)) + ts,
"<tr><td>-</td><td>" + ts + "</td><td>" + rt + flag + "</td></tr>");
"<tr><td>-</td><td>" + ts + "<br><tt>" + hex.transliterate(ts) + "</tt></td><td>"
+ rt + "<br><tt>" + hex.transliterate(rt) + "</tt></td></tr>");
}
}
@ -145,10 +162,12 @@ public class WriteCharts {
out.println("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">");
out.println("<HTML><HEAD>");
out.println("<META content=\"text/html; charset=utf-8\" http-equiv=Content-Type></HEAD>");
out.println("<link rel='stylesheet' href='http://www.unicode.org/charts/uca/charts.css' type='text/css'>");
out.println("<BODY>");
String tableHeader = "<p><table border='1'><tr><th>Source</th><th>Target</th><th>Return</th></tr>";
String tableFooter = "</table></p>";
out.println("<h1>Testing Round Trip</h1>");
out.println("<h1>Round Trip</h1>");
out.println(tableHeader);
Iterator it = map.keySet().iterator();
@ -165,6 +184,7 @@ public class WriteCharts {
String title = "";
if ((group & 0x80) != 0) out.println("<hr><h1>Completeness</h1>");
else out.println("<hr><h1>Round Trip</h1>");
if ((group & 16) != 0) out.println("<h2>Errors: Contains Private Use Characters</h2>");
if ((group & 8) != 0) out.println("<h2>Possible Errors: Return not in Source Set</h2>");
if ((group & 4) != 0) out.println("<h2>Errors: Return not equal to Source</h2>");
if ((group & 2) != 0) out.println("<h2>Errors: Return not in Source Set</h2>");
@ -196,5 +216,16 @@ public class WriteCharts {
return true;
}
// tests whether a string is in a set. Also checks for Common and Inherited
public static boolean containsSome(UnicodeSet set, String s) {
int cp;
for (int i = 0; i < s.length(); i += UTF16.getCharCount(i)) {
cp = UTF16.charAt(s, i);
if (set.contains(cp)) return true;
}
return false;
}
}

View File

@ -15,7 +15,13 @@ public class RoundTripTest extends TestFmwk {
public static void main(String[] args) throws Exception {
new RoundTripTest().run(args);
}
/*
public void TestSingle() throws IOException, ParseException {
Transliterator t = Transliterator.getInstance("Latin-Greek");
String s = t.transliterate("\u0101\u0069");
}
*/
public void TestHiragana() throws IOException, ParseException {
new Test("Latin-Hiragana",
TestUtility.LATIN_SCRIPT, TestUtility.HIRAGANA_SCRIPT)
@ -97,6 +103,12 @@ public class RoundTripTest extends TestFmwk {
.test(null, "[:Devanagari:]", null, this, new Legal());
}
public void TestDevanagariTamil() throws IOException, ParseException {
new Test("Tamil-DEVANAGARI",
TestUtility.TAMIL_SCRIPT, TestUtility.DEVANAGARI_SCRIPT)
.test("[:tamil:]", "[:Devanagari:]", null, this, new Legal());
}
public static class Legal {
public boolean is(String sourceString) {return true;}
}
@ -184,6 +196,7 @@ public class RoundTripTest extends TestFmwk {
UnicodeSet roundtripExclusions;
TestLog log;
Legal legalSource;
UnicodeSet badCharacters;
/*
* create a test for the given script transliterator.
@ -215,6 +228,15 @@ public class RoundTripTest extends TestFmwk {
return false;
}
public boolean includesSome(UnicodeSet set, String a) {
int cp;
for (int i = 0; i < a.length(); i += UTF16.getCharCount(cp)) {
cp = UTF16.charAt(a, i);
if (set.contains(cp)) return true;
}
return false;
}
public static boolean isCamel(String a) {
//System.out.println("CamelTest");
// see if string is of the form aB; e.g. lower, then upper or title
@ -263,6 +285,8 @@ public class RoundTripTest extends TestFmwk {
log.logln(Utility.escape("Source: " + this.sourceRange));
log.logln(Utility.escape("Target: " + this.targetRange));
log.logln(Utility.escape("Exclude: " + this.roundtripExclusions));
badCharacters = new UnicodeSet("[:other:]");
// make a UTF-8 output file we can read with a browser
@ -310,10 +334,16 @@ public class RoundTripTest extends TestFmwk {
if (TestUtility.isUnassigned(c) ||
!isSource(c)) continue;
String cs = String.valueOf(c);
String targ = sourceToTarget.transliterate(String.valueOf(cs));
if (!isReceivingTarget(targ)) {
String targ = sourceToTarget.transliterate(cs);
if (!isReceivingTarget(targ) || includesSome(badCharacters, targ)) {
logWrongScript("Source-Target", cs, targ);
failSourceTarg.set(c);
} else {
String cs2 = Normalizer.normalize(cs, Normalizer.DECOMP, 0);
String targ2 = sourceToTarget.transliterate(cs2);
if (!targ.equals(targ2)) {
logNotCanonical("Source-Target", cs, targ, targ2);
}
}
}
@ -331,10 +361,16 @@ public class RoundTripTest extends TestFmwk {
String cs = String.valueOf(c) + d;
String targ = sourceToTarget.transliterate(cs);
if (!isReceivingTarget(targ)) {
if (!isReceivingTarget(targ) || includesSome(badCharacters, targ)) {
logWrongScript("Source-Target", cs, targ);
} else {
String cs2 = Normalizer.normalize(cs, Normalizer.DECOMP, 0);
String targ2 = sourceToTarget.transliterate(cs2);
if (!targ.equals(targ2)) {
logNotCanonical("Source-Target", cs, targ, targ2);
}
}
}
}
log.logln("Checking that target characters convert to source and back - Singles");
@ -348,12 +384,18 @@ public class RoundTripTest extends TestFmwk {
String cs = String.valueOf(c);
String targ = targetToSource.transliterate(cs);
String reverse = sourceToTarget.transliterate(targ);
if (!isReceivingSource(targ)) {
if (!isReceivingSource(targ) || includesSome(badCharacters, targ)) {
logWrongScript("Target-Source", cs, targ);
failTargSource.set(c);
} else if (!isSame(cs, reverse) && !roundtripExclusions.contains(c)) {
logRoundTripFailure(cs, targ, reverse);
failRound.set(c);
} else {
String targ2 = Normalizer.normalize(targ, Normalizer.DECOMP, 0);
String reverse2 = sourceToTarget.transliterate(targ2);
if (!reverse.equals(reverse2)) {
logNotCanonical("Target-Source", cs, targ, targ2);
}
}
}
@ -375,11 +417,18 @@ public class RoundTripTest extends TestFmwk {
String cs = buf.toString();
String targ = targetToSource.transliterate(cs);
String reverse = sourceToTarget.transliterate(targ);
if (!isReceivingSource(targ) && !failTargSource.get(c) && !failTargSource.get(d)) {
if (!isReceivingSource(targ) && !failTargSource.get(c) && !failTargSource.get(d)
|| includesSome(badCharacters, targ)) {
logWrongScript("Target-Source", cs, targ);
} else if (!isSame(cs, reverse) && !failRound.get(c) && !failRound.get(d)
&& !roundtripExclusions.contains(c) && !roundtripExclusions.contains(d)) {
logRoundTripFailure(cs, targ, reverse);
} else {
String targ2 = Normalizer.normalize(targ, Normalizer.DECOMP, 0);
String reverse2 = sourceToTarget.transliterate(targ2);
if (!reverse.equals(reverse2)) {
logNotCanonical("Target-Source", cs, targ, targ2);
}
}
}
}
@ -398,6 +447,20 @@ public class RoundTripTest extends TestFmwk {
);
}
final void logNotCanonical(String label, String from, String to, String toCan) {
if (++errorCount >= errorLimit) {
throw new TestTruncated("Test truncated; too many failures");
}
out.println("<br>Fail (can.equiv)" + label + ": " +
from + " (" +
TestUtility.hex(from) + ") => " +
to + " (" +
TestUtility.hex(to) + ")" +
toCan + " (" +
TestUtility.hex(to) + ")"
);
}
final void logRoundTripFailure(String from, String to, String back) {
if (!legalSource.is(from)) return; // skip illegals

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/test/translit/Attic/WriteCharts.java,v $
* $Date: 2001/11/02 23:49:39 $
* $Revision: 1.3 $
* $Date: 2001/11/03 05:44:32 $
* $Revision: 1.4 $
*
*****************************************************************************************
*/
@ -22,6 +22,7 @@ import java.io.*;
public class WriteCharts {
public static void main(String[] args) throws IOException {
String testSet = "";
if (args.length == 0) args = all;
for (int i = 0; i < args.length; ++i) {
// Enumeration enum = Transliterator.getAvailableIDs();
if (args[i].startsWith("[")) {
@ -33,6 +34,13 @@ public class WriteCharts {
}
}
static final String[] all = {
"Cyrillic-Latin", "Greek-Latin",
"el-Latin",
"Devanagari-Tamil", "Devanagari-Latin",
"Katakana-Latin", "Hiragana-Latin", "Hangul-Latin"
};
public static void print(String testSet, String rawId) throws IOException {
Transliterator t = Transliterator.getInstance(rawId);
String id = t.getID();
@ -70,12 +78,14 @@ public class WriteCharts {
Transliterator inverse = t.getInverse();
Transliterator hex = Transliterator.getInstance("Any-Hex");
// iterate through script
System.out.println("Transliterating " + sourceSet.toPattern(true)
+ " with " + Transliterator.getDisplayName(id));
UnicodeSet leftOverSet = new UnicodeSet(targetSet);
UnicodeSet privateUse = new UnicodeSet("[:private use:]");
Map map = new TreeMap();
@ -99,11 +109,15 @@ public class WriteCharts {
} else if (!ss.equals(rt)) {
group |= 4;
}
if ((group & 0x7F) != 0) flag = "</td><td>" + hex.transliterate(ss) + "; " + hex.transliterate(ts) + "; " + hex.transliterate(rt);
if (containsSome(privateUse, ts) || containsSome(privateUse, rt)) {
group |= 16;
}
map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ss, Normalizer.DECOMP_COMPAT, 0)) + ss,
"<tr><td>" + ss + "</td><td>" + ts + "</td><td>" + rt + "</td><td>" + flag + "</td></tr>" );
"<tr><td>" + ss + "<br><tt>" + hex.transliterate(ss) + "</tt></td><td>"
+ ts + "<br><tt>" + hex.transliterate(ts) + "</tt></td><td>"
+ rt + "<br><tt>" + hex.transliterate(rt) + "</tt></td></tr>" );
}
}
@ -124,10 +138,13 @@ public class WriteCharts {
if (!isIn(rt, sourceSet)) {
group |= 8;
}
if ((group & 0x7F) != 0) flag = "</td><td>" + hex.transliterate(ts) + "; " + hex.transliterate(rt);
if (containsSome(privateUse, rt)) {
group |= 16;
}
map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ts, Normalizer.DECOMP_COMPAT, 0)) + ts,
"<tr><td>-</td><td>" + ts + "</td><td>" + rt + flag + "</td></tr>");
"<tr><td>-</td><td>" + ts + "<br><tt>" + hex.transliterate(ts) + "</tt></td><td>"
+ rt + "<br><tt>" + hex.transliterate(rt) + "</tt></td></tr>");
}
}
@ -145,10 +162,12 @@ public class WriteCharts {
out.println("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">");
out.println("<HTML><HEAD>");
out.println("<META content=\"text/html; charset=utf-8\" http-equiv=Content-Type></HEAD>");
out.println("<link rel='stylesheet' href='http://www.unicode.org/charts/uca/charts.css' type='text/css'>");
out.println("<BODY>");
String tableHeader = "<p><table border='1'><tr><th>Source</th><th>Target</th><th>Return</th></tr>";
String tableFooter = "</table></p>";
out.println("<h1>Testing Round Trip</h1>");
out.println("<h1>Round Trip</h1>");
out.println(tableHeader);
Iterator it = map.keySet().iterator();
@ -165,6 +184,7 @@ public class WriteCharts {
String title = "";
if ((group & 0x80) != 0) out.println("<hr><h1>Completeness</h1>");
else out.println("<hr><h1>Round Trip</h1>");
if ((group & 16) != 0) out.println("<h2>Errors: Contains Private Use Characters</h2>");
if ((group & 8) != 0) out.println("<h2>Possible Errors: Return not in Source Set</h2>");
if ((group & 4) != 0) out.println("<h2>Errors: Return not equal to Source</h2>");
if ((group & 2) != 0) out.println("<h2>Errors: Return not in Source Set</h2>");
@ -196,5 +216,16 @@ public class WriteCharts {
return true;
}
// tests whether a string is in a set. Also checks for Common and Inherited
public static boolean containsSome(UnicodeSet set, String s) {
int cp;
for (int i = 0; i < s.length(); i += UTF16.getCharCount(i)) {
cp = UTF16.charAt(s, i);
if (set.contains(cp)) return true;
}
return false;
}
}