ICU-5809 decreased testing time for Hangul, while maintaining coverage. A special prop flag reenables exhaustive, if needed.

X-SVN-Rev: 25905
2009-04-24 18:55:05 +00:00 · 2009-04-24 18:55:05 +00:00 · e9478b7207
commit e9478b7207
parent 9988ef9072
3 changed files with 138 additions and 10 deletions
--- a/icu4j/src/com/ibm/icu/dev/test/translit/RoundTripTest.java
+++ b/icu4j/src/com/ibm/icu/dev/test/translit/RoundTripTest.java
@ -150,11 +150,122 @@ public class RoundTripTest extends TestFmwk {
    public void TestHangul() throws IOException {
        long start = System.currentTimeMillis();
        Test t = new Test("Latin-Hangul", 5);
-        if (getInclusion() < 10) t.setPairLimit(1000);
+        boolean TEST_ALL = "true".equalsIgnoreCase(getProperty("HangulRoundTripAll")); 
+        if (TEST_ALL && getInclusion() == 10) {
+            t.setPairLimit(Integer.MAX_VALUE); // only go to the limit if we have TEST_ALL and getInclusion
+        }
        t.test("[a-zA-Z]", "[\uAC00-\uD7A4]", "", this, new Legal());
        showElapsed(start, "TestHangul");
    }

+    /**
+     * This is a shorter version of the test for doubles, that allows us to skip lots of cases, but
+     * does check the ones that should cause problems (if any do).
+     */
+    public void TestHangul2() {
+        Transliterator lh = Transliterator.getInstance("Latin-Hangul");
+        Transliterator hl = lh.getInverse();
+        final UnicodeSet representativeHangul = getRepresentativeHangul();
+        for (UnicodeSetIterator it = new UnicodeSetIterator(representativeHangul); it.next();) {
+            assertRoundTripTransform("Transform", it.getString(), lh, hl);
+        }
+    }
+
+    private void assertRoundTripTransform(String message, String source, Transliterator lh, Transliterator hl) {
+        String to = hl.transform(source);
+        String back = lh.transform(to);
+        if (!source.equals(back)) {
+            String to2 = hl.transform(source.replaceAll("(.)", "$1 ").trim());
+            String to3 = hl.transform(back.replaceAll("(.)", "$1 ").trim());
+            assertEquals(message + " " + source + " [" + to + "/"+ to2 + "/"+ to3 + "]", source, back);
+        }
+    }
+
+    public static UnicodeSet getRepresentativeHangul() {
+        UnicodeSet extraSamples = new UnicodeSet("[\uCE20{\uAD6C\uB514}{\uAD73\uC774}{\uBB34\uB837}{\uBB3C\uC5FF}{\uC544\uAE4C}{\uC544\uB530}{\uC544\uBE60}{\uC544\uC2F8}{\uC544\uC9DC}{\uC544\uCC28}{\uC545\uC0AC}{\uC545\uC2F8}{\uC546\uCE74}{\uC548\uAC00}{\uC548\uC790}{\uC548\uC9DC}{\uC548\uD558}{\uC54C\uAC00}{\uC54C\uB530}{\uC54C\uB9C8}{\uC54C\uBC14}{\uC54C\uBE60}{\uC54C\uC0AC}{\uC54C\uC2F8}{\uC54C\uD0C0}{\uC54C\uD30C}{\uC54C\uD558}{\uC555\uC0AC}{\uC555\uC2F8}{\uC558\uC0AC}{\uC5C5\uC12F\uC501}{\uC5C6\uC5C8\uC2B5}]");
+        UnicodeSet sourceSet = new UnicodeSet();
+        addRepresentativeHangul(sourceSet, 2, false);
+        addRepresentativeHangul(sourceSet, 3, false);
+        addRepresentativeHangul(sourceSet, 2, true);
+        addRepresentativeHangul(sourceSet, 3, true);
+        // add the boundary cases; we want an example of each case of V + L and one example of each case of T+L
+
+        UnicodeSet more = getRepresentativeBoundaryHangul();
+        sourceSet.addAll(more);
+        sourceSet.addAll(extraSamples);
+        return sourceSet;
+    }
+
+    private static UnicodeSet getRepresentativeBoundaryHangul() {
+        UnicodeSet resultToAddTo = new UnicodeSet();
+        // U+1100 HANGUL CHOSEONG KIYEOK
+        // U+1161 HANGUL JUNGSEONG A
+        UnicodeSet L = new UnicodeSet("[:hst=L:]");
+        UnicodeSet V = new UnicodeSet("[:hst=V:]");
+        UnicodeSet T = new UnicodeSet("[:hst=T:]");
+
+        String prefixLV = "\u1100\u1161";
+        String prefixL = "\u1100";
+        String suffixV = "\u1161";
+        String nullL = "\u110B"; // HANGUL CHOSEONG IEUNG
+
+        UnicodeSet L0 = new UnicodeSet("[\u1100\u110B]");
+
+        // do all combinations of L0 + V + nullL + V
+
+        for (UnicodeSetIterator iL0 = new UnicodeSetIterator(L0); iL0.next();) {
+            for (UnicodeSetIterator iV = new UnicodeSetIterator(V); iV.next();) {
+                for (UnicodeSetIterator iV2 = new UnicodeSetIterator(V); iV2.next();) {
+                    String sample = iL0.getString() + iV.getString() + nullL + iV2.getString();
+                    String trial = Normalizer.compose(sample, false);
+                    if (trial.length() == 2) {
+                        resultToAddTo.add(trial);
+                    }
+                }
+            }
+        }
+
+        for (UnicodeSetIterator iL = new UnicodeSetIterator(L); iL.next();) {
+            // do all combinations of "g" + V + L + "a"
+            final String suffix = iL.getString() + suffixV;
+            for (UnicodeSetIterator iV = new UnicodeSetIterator(V); iV.next();) {
+                String sample = prefixL + iV.getString() + suffix;
+                String trial = Normalizer.compose(sample, false);
+                if (trial.length() == 2) {
+                    resultToAddTo.add(trial);
+                }
+            }
+            // do all combinations of "ga" + T + L + "a"
+            for (UnicodeSetIterator iT = new UnicodeSetIterator(T); iT.next();) {
+                String sample = prefixLV + iT.getString() + suffix;
+                String trial = Normalizer.compose(sample, false);
+                if (trial.length() == 2) {
+                    resultToAddTo.add(trial);
+                }
+            }
+        }
+        return resultToAddTo;
+    }
+
+    private static void addRepresentativeHangul(UnicodeSet resultToAddTo, int leng, boolean noFirstConsonant) {
+        UnicodeSet notYetSeen = new UnicodeSet();
+        for (char c = '\uAC00'; c <  '\uD7AF'; ++c) {
+            String charStr = String.valueOf(c);
+            String decomp = Normalizer.decompose(charStr, false);
+            if (decomp.length() != leng) {
+                continue; // only take one length at a time
+            }
+            if (decomp.startsWith("\u110B ") != noFirstConsonant) {
+                continue;
+            }
+            if (!notYetSeen.containsAll(decomp)) {
+                resultToAddTo.add(c);
+                notYetSeen.addAll(decomp);
+            }
+        }
+    }
+
+
    public void TestHan() throws UnsupportedEncodingException, FileNotFoundException {
        try{
            UnicodeSet exemplars = LocaleData.getExemplarSet(new ULocale("zh"),0);
@ -946,7 +1057,7 @@ public class RoundTripTest extends TestFmwk {
        private String transliteratorID;
        private int errorLimit = 500;
        private int errorCount = 0;
-        private int pairLimit  = 0x10000;
+        private long pairLimit  = 1000000; // make default be 1M.
        private int density = 100;
        UnicodeSet sourceRange;
        UnicodeSet targetRange;
@ -1206,7 +1317,7 @@ public class RoundTripTest extends TestFmwk {
            checkSourceTargetSingles(failSourceTarg);

            boolean quickRt = checkSourceTargetDoubles(failSourceTarg);
-            
+
            UnicodeSet failTargSource = new UnicodeSet();
            UnicodeSet failRound = new UnicodeSet();

@ -1284,6 +1395,7 @@ public class RoundTripTest extends TestFmwk {
        private boolean checkSourceTargetDoubles(UnicodeSet failSourceTarg) {
            log.logln("Checking that source characters convert to target - Doubles");
            out.println("<h3>Checking that source characters convert to target - Doubles</h3>");
+            long count = 0;

            /*
            for (char c = 0; c < 0xFFFF; ++c) {
@ -1309,10 +1421,12 @@ public class RoundTripTest extends TestFmwk {
                        !sourceRange.contains(d)) continue;
                    if (failSourceTarg.get(d)) continue;
                 */
+                log.logln(count + "/" + pairLimit + " Checking starting with " + UTF16.valueOf(c));
                usi2.reset(sourceRangeMinusFailures, quickRt, density);

                while (usi2.next()) {
                    int d = usi2.codepoint;
+                    ++count;

                    String cs = UTF16.valueOf(c) + UTF16.valueOf(d);
                    String targ = sourceToTarget.transliterate(cs);
@ -1334,7 +1448,7 @@ public class RoundTripTest extends TestFmwk {
            }
            return quickRt;
        }
-        
+
        void checkTargetSourceSingles(UnicodeSet failTargSource, UnicodeSet failRound) {
            log.logln("Checking that target characters convert to source and back - Singles");
            out.println("<h3>Checking that target characters convert to source and back - Singles</h3>");
@ -1390,7 +1504,7 @@ public class RoundTripTest extends TestFmwk {
                UnicodeSet failRound) {
            log.logln("Checking that target characters convert to source and back - Doubles");
            out.println("<h3>Checking that target characters convert to source and back - Doubles</h3>");
-            int count = 0;
+            long count = 0;

            UnicodeSet targetRangeMinusFailures = new UnicodeSet(targetRange);
            targetRangeMinusFailures.removeAll(failTargSource);
@ -1402,15 +1516,12 @@ public class RoundTripTest extends TestFmwk {
                if (TestUtility.isUnassigned(c) ||
                    !targetRange.contains(c)) continue;
             */
-
+            
            usi.reset(targetRangeMinusFailures, quickRt, density);

            while (usi.next()) {
                int c = usi.codepoint;

-                if (++count > pairLimit) {
-                    throw new TestTruncated("Test truncated at " + pairLimit + " x 64k pairs");
-                }
                //log.log(TestUtility.hex(c));

                /*
@ -1418,11 +1529,17 @@ public class RoundTripTest extends TestFmwk {
                    if (TestUtility.isUnassigned(d) ||
                        !targetRange.contains(d)) continue;
                 */
+                log.logln(count + "/" + pairLimit + " Checking starting with " + UTF16.valueOf(c));
                usi2.reset(targetRangeMinusFailures, quickRt, density);

                while (usi2.next()) {
+                    
                    int d = usi2.codepoint;
                    if (d < 0) break;
+                    
+                    if (++count > pairLimit) {
+                        throw new TestTruncated("Test truncated at " + pairLimit);
+                    }

                    String cs = UTF16.valueOf(c) + UTF16.valueOf(d);
                    String targ = targetToSource.transliterate(cs);
@ -1637,4 +1754,6 @@ public class RoundTripTest extends TestFmwk {
    //          return super.isSource(c);
    //      }
    //  }
+
+
 }
--- a/icu4j/src/com/ibm/icu/dev/test/util/BagFormatter.java
+++ b/icu4j/src/com/ibm/icu/dev/test/util/BagFormatter.java
@ -839,7 +839,7 @@ public class BagFormatter {
    }

    public static BufferedReader openReader(String dir, String filename, String encoding) throws IOException {
-        File file = new File(dir + filename);
+        File file = new File(dir, filename);
        if (SHOW_FILES && log != null) {
            log.println("Opening File: "
                + file.getCanonicalPath());
--- a/icu4j/src/com/ibm/icu/text/UnicodeSetIterator.java
+++ b/icu4j/src/com/ibm/icu/text/UnicodeSetIterator.java
@ -220,6 +220,15 @@ public class UnicodeSetIterator {
    private UnicodeSet set;
    private int endRange = 0;
    private int range = 0;
+    
+    /**
+     * @internal
+     * @deprecated This API is ICU internal only.
+     */
+    public UnicodeSet getSet() {
+        return set;
+    }
+    
    /**
     * @internal
     * @deprecated This API is ICU internal only.