ICU-5809 decreased testing time for Hangul, while maintaining coverage. A special prop flag reenables exhaustive, if needed.

X-SVN-Rev: 25905
This commit is contained in:
Mark Davis 2009-04-24 18:55:05 +00:00
parent 9988ef9072
commit e9478b7207
3 changed files with 138 additions and 10 deletions

View File

@ -150,11 +150,122 @@ public class RoundTripTest extends TestFmwk {
public void TestHangul() throws IOException {
long start = System.currentTimeMillis();
Test t = new Test("Latin-Hangul", 5);
if (getInclusion() < 10) t.setPairLimit(1000);
boolean TEST_ALL = "true".equalsIgnoreCase(getProperty("HangulRoundTripAll"));
if (TEST_ALL && getInclusion() == 10) {
t.setPairLimit(Integer.MAX_VALUE); // only go to the limit if we have TEST_ALL and getInclusion
}
t.test("[a-zA-Z]", "[\uAC00-\uD7A4]", "", this, new Legal());
showElapsed(start, "TestHangul");
}
/**
* This is a shorter version of the test for doubles, that allows us to skip lots of cases, but
* does check the ones that should cause problems (if any do).
*/
public void TestHangul2() {
Transliterator lh = Transliterator.getInstance("Latin-Hangul");
Transliterator hl = lh.getInverse();
final UnicodeSet representativeHangul = getRepresentativeHangul();
for (UnicodeSetIterator it = new UnicodeSetIterator(representativeHangul); it.next();) {
assertRoundTripTransform("Transform", it.getString(), lh, hl);
}
}
private void assertRoundTripTransform(String message, String source, Transliterator lh, Transliterator hl) {
String to = hl.transform(source);
String back = lh.transform(to);
if (!source.equals(back)) {
String to2 = hl.transform(source.replaceAll("(.)", "$1 ").trim());
String to3 = hl.transform(back.replaceAll("(.)", "$1 ").trim());
assertEquals(message + " " + source + " [" + to + "/"+ to2 + "/"+ to3 + "]", source, back);
}
}
public static UnicodeSet getRepresentativeHangul() {
UnicodeSet extraSamples = new UnicodeSet("[\uCE20{\uAD6C\uB514}{\uAD73\uC774}{\uBB34\uB837}{\uBB3C\uC5FF}{\uC544\uAE4C}{\uC544\uB530}{\uC544\uBE60}{\uC544\uC2F8}{\uC544\uC9DC}{\uC544\uCC28}{\uC545\uC0AC}{\uC545\uC2F8}{\uC546\uCE74}{\uC548\uAC00}{\uC548\uC790}{\uC548\uC9DC}{\uC548\uD558}{\uC54C\uAC00}{\uC54C\uB530}{\uC54C\uB9C8}{\uC54C\uBC14}{\uC54C\uBE60}{\uC54C\uC0AC}{\uC54C\uC2F8}{\uC54C\uD0C0}{\uC54C\uD30C}{\uC54C\uD558}{\uC555\uC0AC}{\uC555\uC2F8}{\uC558\uC0AC}{\uC5C5\uC12F\uC501}{\uC5C6\uC5C8\uC2B5}]");
UnicodeSet sourceSet = new UnicodeSet();
addRepresentativeHangul(sourceSet, 2, false);
addRepresentativeHangul(sourceSet, 3, false);
addRepresentativeHangul(sourceSet, 2, true);
addRepresentativeHangul(sourceSet, 3, true);
// add the boundary cases; we want an example of each case of V + L and one example of each case of T+L
UnicodeSet more = getRepresentativeBoundaryHangul();
sourceSet.addAll(more);
sourceSet.addAll(extraSamples);
return sourceSet;
}
private static UnicodeSet getRepresentativeBoundaryHangul() {
UnicodeSet resultToAddTo = new UnicodeSet();
// U+1100 HANGUL CHOSEONG KIYEOK
// U+1161 HANGUL JUNGSEONG A
UnicodeSet L = new UnicodeSet("[:hst=L:]");
UnicodeSet V = new UnicodeSet("[:hst=V:]");
UnicodeSet T = new UnicodeSet("[:hst=T:]");
String prefixLV = "\u1100\u1161";
String prefixL = "\u1100";
String suffixV = "\u1161";
String nullL = "\u110B"; // HANGUL CHOSEONG IEUNG
UnicodeSet L0 = new UnicodeSet("[\u1100\u110B]");
// do all combinations of L0 + V + nullL + V
for (UnicodeSetIterator iL0 = new UnicodeSetIterator(L0); iL0.next();) {
for (UnicodeSetIterator iV = new UnicodeSetIterator(V); iV.next();) {
for (UnicodeSetIterator iV2 = new UnicodeSetIterator(V); iV2.next();) {
String sample = iL0.getString() + iV.getString() + nullL + iV2.getString();
String trial = Normalizer.compose(sample, false);
if (trial.length() == 2) {
resultToAddTo.add(trial);
}
}
}
}
for (UnicodeSetIterator iL = new UnicodeSetIterator(L); iL.next();) {
// do all combinations of "g" + V + L + "a"
final String suffix = iL.getString() + suffixV;
for (UnicodeSetIterator iV = new UnicodeSetIterator(V); iV.next();) {
String sample = prefixL + iV.getString() + suffix;
String trial = Normalizer.compose(sample, false);
if (trial.length() == 2) {
resultToAddTo.add(trial);
}
}
// do all combinations of "ga" + T + L + "a"
for (UnicodeSetIterator iT = new UnicodeSetIterator(T); iT.next();) {
String sample = prefixLV + iT.getString() + suffix;
String trial = Normalizer.compose(sample, false);
if (trial.length() == 2) {
resultToAddTo.add(trial);
}
}
}
return resultToAddTo;
}
private static void addRepresentativeHangul(UnicodeSet resultToAddTo, int leng, boolean noFirstConsonant) {
UnicodeSet notYetSeen = new UnicodeSet();
for (char c = '\uAC00'; c < '\uD7AF'; ++c) {
String charStr = String.valueOf(c);
String decomp = Normalizer.decompose(charStr, false);
if (decomp.length() != leng) {
continue; // only take one length at a time
}
if (decomp.startsWith("\u110B ") != noFirstConsonant) {
continue;
}
if (!notYetSeen.containsAll(decomp)) {
resultToAddTo.add(c);
notYetSeen.addAll(decomp);
}
}
}
public void TestHan() throws UnsupportedEncodingException, FileNotFoundException {
try{
UnicodeSet exemplars = LocaleData.getExemplarSet(new ULocale("zh"),0);
@ -946,7 +1057,7 @@ public class RoundTripTest extends TestFmwk {
private String transliteratorID;
private int errorLimit = 500;
private int errorCount = 0;
private int pairLimit = 0x10000;
private long pairLimit = 1000000; // make default be 1M.
private int density = 100;
UnicodeSet sourceRange;
UnicodeSet targetRange;
@ -1206,7 +1317,7 @@ public class RoundTripTest extends TestFmwk {
checkSourceTargetSingles(failSourceTarg);
boolean quickRt = checkSourceTargetDoubles(failSourceTarg);
UnicodeSet failTargSource = new UnicodeSet();
UnicodeSet failRound = new UnicodeSet();
@ -1284,6 +1395,7 @@ public class RoundTripTest extends TestFmwk {
private boolean checkSourceTargetDoubles(UnicodeSet failSourceTarg) {
log.logln("Checking that source characters convert to target - Doubles");
out.println("<h3>Checking that source characters convert to target - Doubles</h3>");
long count = 0;
/*
for (char c = 0; c < 0xFFFF; ++c) {
@ -1309,10 +1421,12 @@ public class RoundTripTest extends TestFmwk {
!sourceRange.contains(d)) continue;
if (failSourceTarg.get(d)) continue;
*/
log.logln(count + "/" + pairLimit + " Checking starting with " + UTF16.valueOf(c));
usi2.reset(sourceRangeMinusFailures, quickRt, density);
while (usi2.next()) {
int d = usi2.codepoint;
++count;
String cs = UTF16.valueOf(c) + UTF16.valueOf(d);
String targ = sourceToTarget.transliterate(cs);
@ -1334,7 +1448,7 @@ public class RoundTripTest extends TestFmwk {
}
return quickRt;
}
void checkTargetSourceSingles(UnicodeSet failTargSource, UnicodeSet failRound) {
log.logln("Checking that target characters convert to source and back - Singles");
out.println("<h3>Checking that target characters convert to source and back - Singles</h3>");
@ -1390,7 +1504,7 @@ public class RoundTripTest extends TestFmwk {
UnicodeSet failRound) {
log.logln("Checking that target characters convert to source and back - Doubles");
out.println("<h3>Checking that target characters convert to source and back - Doubles</h3>");
int count = 0;
long count = 0;
UnicodeSet targetRangeMinusFailures = new UnicodeSet(targetRange);
targetRangeMinusFailures.removeAll(failTargSource);
@ -1402,15 +1516,12 @@ public class RoundTripTest extends TestFmwk {
if (TestUtility.isUnassigned(c) ||
!targetRange.contains(c)) continue;
*/
usi.reset(targetRangeMinusFailures, quickRt, density);
while (usi.next()) {
int c = usi.codepoint;
if (++count > pairLimit) {
throw new TestTruncated("Test truncated at " + pairLimit + " x 64k pairs");
}
//log.log(TestUtility.hex(c));
/*
@ -1418,11 +1529,17 @@ public class RoundTripTest extends TestFmwk {
if (TestUtility.isUnassigned(d) ||
!targetRange.contains(d)) continue;
*/
log.logln(count + "/" + pairLimit + " Checking starting with " + UTF16.valueOf(c));
usi2.reset(targetRangeMinusFailures, quickRt, density);
while (usi2.next()) {
int d = usi2.codepoint;
if (d < 0) break;
if (++count > pairLimit) {
throw new TestTruncated("Test truncated at " + pairLimit);
}
String cs = UTF16.valueOf(c) + UTF16.valueOf(d);
String targ = targetToSource.transliterate(cs);
@ -1637,4 +1754,6 @@ public class RoundTripTest extends TestFmwk {
// return super.isSource(c);
// }
// }
}

View File

@ -839,7 +839,7 @@ public class BagFormatter {
}
public static BufferedReader openReader(String dir, String filename, String encoding) throws IOException {
File file = new File(dir + filename);
File file = new File(dir, filename);
if (SHOW_FILES && log != null) {
log.println("Opening File: "
+ file.getCanonicalPath());

View File

@ -220,6 +220,15 @@ public class UnicodeSetIterator {
private UnicodeSet set;
private int endRange = 0;
private int range = 0;
/**
* @internal
* @deprecated This API is ICU internal only.
*/
public UnicodeSet getSet() {
return set;
}
/**
* @internal
* @deprecated This API is ICU internal only.