ICU-4472 add new simpler test cases to illustrate the problem

X-SVN-Rev: 17503
2005-04-25 22:39:30 +00:00 · 2005-04-25 22:39:30 +00:00 · 8fe398c368
commit 8fe398c368
parent 9ce9eda382
1 changed files with 25 additions and 1 deletions
--- a/icu4j/src/com/ibm/icu/dev/test/normalizer/NormalizerRegressionTests.java
+++ b/icu4j/src/com/ibm/icu/dev/test/normalizer/NormalizerRegressionTests.java
@ -16,7 +16,31 @@ public class NormalizerRegressionTests extends TestFmwk {
    }

    public void TestJB4472() {
+	// submitter's test case
 	String tamil = "\u0b87\u0ba8\u0bcd\u0ba4\u0bbf\u0baf\u0bbe";
-	logln("Normalized: " + Normalizer.isNormalized(tamil, Normalizer.NFC, 0));
+ 	logln("Normalized: " + Normalizer.isNormalized(tamil, Normalizer.NFC, 0));
+
+	// markus's test case
+	// the combining cedilla can't be applied to 'b', so this is in normalized form.
+	// but the isNormalized test identifies the cedilla as a 'maybe' and so tries
+	// to normalize the relevant substring ("b\u0327")and compare the result to the
+	// original.  the original code was passing in the start and length of the
+	// substring (3, 5-3 = 2) but the called code was expecting start and limit.
+	// it subtracted the start again to get what it thought was the length, but
+	// ended up with -1.  the loop was incrementing an index from 0 and testing
+	// against length, but 0 was never == -1 before it walked off the array end.
+
+	// a workaround in lieu of this patch is to catch the exception and always
+	// normalize.
+
+	// this should return true, since the string is normalized (and it should
+	// not throw an exception!)
+	String sample = "aaab\u0327";
+	logln("Normalized: " + Normalizer.isNormalized(sample, Normalizer.NFC, 0));
+
+	// this should return false, since the string is _not_ normalized (and it should
+	// not throw an exception!)
+	String sample2 = "aaac\u0327";
+	logln("Normalized: " + Normalizer.isNormalized(sample2, Normalizer.NFC, 0));
    }
 }