ICU-3295 RBBI runtime port to Java

X-SVN-Rev: 15055
This commit is contained in:
Andy Heninger 2004-04-27 06:09:07 +00:00
parent 8fa8d3c4d6
commit 1015553ef2

View File

@ -8,13 +8,15 @@ package com.ibm.icu.dev.test.rbbi;
// Monkey testing of RuleBasedBreakIterator
import com.ibm.icu.dev.test.*;
import com.ibm.icu.text.RuleBasedBreakIterator;
import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.text.UCharacterIterator;
import com.ibm.icu.impl.StringUCharacterIterator;
import com.ibm.icu.text.UnicodeSet;
import java.text.CharacterIterator;
import java.util.List;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.Locale;
/**
@ -114,6 +116,36 @@ public class RBBITestMonkey extends TestFmwk {
}
}
static class RBBIWordMonkey extends RBBIMonkeyKind {
List charClasses() {
return null; // TODO:
}
void setText(String text) { // TODO:
}
int next(int i) { // TODO:
return 0;
}
}
static class RBBILineMonkey extends RBBIMonkeyKind {
List charClasses() {
return null; // TODO:
}
void setText(String text) { // TODO:
}
int next(int i) { // TODO:
return 0;
}
}
private static UnicodeSet GC_Control =
new UnicodeSet("[[:Zl:][:Zp:][:Cc:][:Cf:]-[\\u000d\\u000a]]");
@ -242,8 +274,283 @@ public class RBBITestMonkey extends TestFmwk {
return pos;
}
public void TestCharMonkey() {
logln("Hello from CharMonkeyTest");
/**
* random number generator. Not using Java's built-in Randoms for two reasons:
* 1. Using this code allows obtaining the same sequences as those from the ICU4C monkey test.
* 2. We need to get and restore the seed from values occuring in the middle
* of a long sequence, to more easily reproduce failing cases.
*/
private static int m_seed = 1;
private static int m_rand()
{
m_seed = m_seed * 1103515245 + 12345;
return (int)(m_seed/65536) % 32768;
}
/**
* Run a RBBI monkey test. Common routine, for all break iterator types.
* Parameters:
* bi - the break iterator to use
* mk - MonkeyKind, abstraction for obtaining expected results
* name - Name of test (char, word, etc.) for use in error messages
* seed - Seed for starting random number generator (parameter from user)
* numIterations
*/
void RunMonkey(BreakIterator bi, RBBIMonkeyKind mk, String name, int seed, int numIterations) {
int TESTSTRINGLEN = 500;
StringBuffer testText = new StringBuffer();
int numCharClasses;
List chClasses;
int[] expected = new int[TESTSTRINGLEN*2 + 1];
int expectedCount = 0;
boolean[] expectedBreaks = new boolean[TESTSTRINGLEN*2 + 1];
boolean[] forwardBreaks = new boolean[TESTSTRINGLEN*2 + 1];
boolean[] reverseBreaks = new boolean[TESTSTRINGLEN*2+1];
boolean[] isBoundaryBreaks = new boolean[TESTSTRINGLEN*2+1];
int i;
int loopCount = 0;
m_seed = seed;
numCharClasses = mk.charClasses().size();
chClasses = mk.charClasses();
// Verify that the character classes all have at least one member.
for (i=0; i<numCharClasses; i++) {
UnicodeSet s = (UnicodeSet)chClasses.get(i);
if (s == null || s.size() == 0) {
errln("Character Class " + i + " is null or of zero size.");
return;
}
}
while (loopCount < numIterations || numIterations == -1) {
if (numIterations == -1 && loopCount % 10 == 0) {
// If test is running in an infinite loop, display a periodic tic so
// we can tell that it is making progress.
System.out.print(".");
}
// Save current random number seed, so that we can recreate the random numbers
// for this loop iteration in event of an error.
seed = m_seed;
testText.setLength(0);
// Populate a test string with data.
for (i=0; i<TESTSTRINGLEN; i++) {
int aClassNum = m_rand() % numCharClasses;
UnicodeSet classSet = (UnicodeSet)chClasses.get(aClassNum);
int charIdx = m_rand() % classSet.size();
int c = classSet.charAt(charIdx);
if (c < 0) { // TODO: deal with sets containing strings.
errln("c < 0");
}
testText.append(c);
}
Arrays.fill(expected, 0);
Arrays.fill(expectedBreaks, false);
Arrays.fill(forwardBreaks, false);
Arrays.fill(reverseBreaks, false);
Arrays.fill(isBoundaryBreaks, false);
// Calculate the expected results for this test string.
mk.setText(testText.toString());
expectedBreaks[0] = true;
int breakPos = 0;
expectedCount = 0;
for (;;) {
breakPos = mk.next(breakPos);
if (breakPos == -1) {
break;
}
if (breakPos > testText.length()) {
errln("breakPos > testText.length()");
}
expectedBreaks[breakPos] = true;
expected[expectedCount ++] = breakPos;
}
// Find the break positions using forward iteration
bi.setText(testText.toString());
for (i=bi.first(); i != BreakIterator.DONE; i=bi.next()) {
if (i < 0 || i > testText.length()) {
errln(name + " break monkey test: Out of range value returned by breakIterator::next()");
break;
}
forwardBreaks[i] = true;
}
// Find the break positions using reverse iteration
for (i=bi.last(); i != BreakIterator.DONE; i=bi.previous()) {
if (i < 0 || i > testText.length()) {
errln(name + " break monkey test: Out of range value returned by breakIterator.next()" + name);
break;
}
reverseBreaks[i] = true;
}
// Find the break positions using isBoundary() tests.
for (i=0; i<=testText.length(); i++) {
isBoundaryBreaks[i] = bi.isBoundary(i);
}
// Compare the expected and actual results.
for (i=0; i<=testText.length(); i++) {
String errorType = null;
if (forwardBreaks[i] != expectedBreaks[i]) {
errorType = "next()";
} else if (reverseBreaks[i] != forwardBreaks[i]) {
errorType = "previous()";
} else if (isBoundaryBreaks[i] != expectedBreaks[i]) {
errorType = "isBoundary()";
}
if (errorType != null) {
// Format a range of the test text that includes the failure as
// a data item that can be included in the rbbi test data file.
// Start of the range is the last point where expected and actual results
// both agreed that there was a break position.
int startContext = i;
int count = 0;
for (;;) {
if (startContext==0) { break; }
startContext --;
if (expectedBreaks[startContext]) {
if (count == 2) break;
count ++;
}
}
// End of range is two expected breaks past the start position.
int endContext = i + 1;
int ci;
for (ci=0; ci<2; ci++) { // Number of items to include in error text.
for (;;) {
if (endContext >= testText.length()) {break;}
if (expectedBreaks[endContext-1]) {
if (count == 0) break;
count --;
}
endContext ++;
}
}
// Format looks like "<data><>\uabcd\uabcd<>\U0001abcd...</data>"
StringBuffer errorText = new StringBuffer();
errorText.append("<data>");
StringUCharacterIterator sci = new StringUCharacterIterator();
sci.setText(errorText.toString());
sci.setIndex(startContext);
String hexChars = "0123456789abcdef";
int c;
int bn;
while (true) {
ci = sci.getIndex();
c = sci.nextCodePoint();
if (ci >= endContext) {
break;
}
if (ci == i) {
// This is the location of the error.
errorText.append("<?>");
} else if (expectedBreaks[ci]) {
// This a non-error expected break position.
errorText.append("<>");
}
if (c < 0x10000) {
errorText.append("\\u");
for (bn=12; bn>=0; bn-=4) {
errorText.append(hexChars.charAt((((int)c)>>bn)&0xf));
}
} else {
errorText.append("\\U");
for (bn=28; bn>=0; bn-=4) {
errorText.append(hexChars.charAt((((int)c)>>bn)&0xf));
}
}
}
errorText.append("<>");
errorText.append("</data>\n");
// Output the error
errln(name + " break monkey test error. " +
(expectedBreaks[i]? "break expected but not found" : "break found but not expected") +
"Operation = " + errorType + "; Random seed = " + seed + "; buf Idx = " + i + "\n" +
errorText);
break;
}
}
loopCount++;
}
}
public void TestMonkey(/* String[] params */) {
int loopCount = 500;
int seed = 1;
String breakType = "all";
boolean quick = true; // TODO: from test framework in C++. What's the equivalent here?
// Locale locale("en");
if (quick == false) {
loopCount = 10000;
}
/*
if (params) {
UnicodeString p(params);
loopCount = getIntParam("loop", p, loopCount);
seed = getIntParam("seed", p, seed);
RegexMatcher m(" *type *= *(char|word|line|sent|title) *", p, 0, status);
if (m.find()) {
breakType = m.group(1, status);
m.reset();
p = m.replaceFirst("", status);
}
m.reset(p);
if (RegexMatcher("\\S", p, 0, status).find()) {
// Each option is stripped out of the option string as it is processed.
// All options have been checked. The option string should have been completely emptied..
char buf[100];
p.extract(buf, sizeof(buf), NULL, status);
buf[sizeof(buf)-1] = 0;
errln("Unrecognized or extra parameter: %s\n", buf);
return;
}
}
*/
if (breakType == "char" || breakType == "all") {
RBBICharMonkey m = new RBBICharMonkey();
BreakIterator bi = BreakIterator.getCharacterInstance(Locale.US);
//RunMonkey(bi, m, "char", seed, loopCount);
}
if (breakType == "word" || breakType == "all") {
logln("Word Break Monkey Test");
RBBIWordMonkey m = new RBBIWordMonkey();
BreakIterator bi = BreakIterator.getWordInstance(Locale.US);
//RunMonkey(bi, m, "word", seed, loopCount);
}
if (breakType == "line" || breakType == "all") {
logln("Line Break Monkey Test");
RBBILineMonkey m = new RBBILineMonkey();
BreakIterator bi = BreakIterator.getLineInstance(Locale.US);
if (params == null) {
loopCount = 50;
}
//RunMonkey(bi, m, "line", seed, loopCount);
}
}
}