ICU-3295 RBBI runtime port to Java
X-SVN-Rev: 15055
This commit is contained in:
parent
8fa8d3c4d6
commit
1015553ef2
@ -8,13 +8,15 @@ package com.ibm.icu.dev.test.rbbi;
|
||||
// Monkey testing of RuleBasedBreakIterator
|
||||
import com.ibm.icu.dev.test.*;
|
||||
import com.ibm.icu.text.RuleBasedBreakIterator;
|
||||
import com.ibm.icu.text.BreakIterator;
|
||||
import com.ibm.icu.text.UCharacterIterator;
|
||||
import com.ibm.icu.impl.StringUCharacterIterator;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import java.text.CharacterIterator;
|
||||
import java.util.List;
|
||||
import java.util.Arrays;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
|
||||
/**
|
||||
@ -114,6 +116,36 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static class RBBIWordMonkey extends RBBIMonkeyKind {
|
||||
List charClasses() {
|
||||
return null; // TODO:
|
||||
}
|
||||
|
||||
void setText(String text) { // TODO:
|
||||
}
|
||||
|
||||
int next(int i) { // TODO:
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
static class RBBILineMonkey extends RBBIMonkeyKind {
|
||||
List charClasses() {
|
||||
return null; // TODO:
|
||||
}
|
||||
|
||||
void setText(String text) { // TODO:
|
||||
}
|
||||
|
||||
int next(int i) { // TODO:
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
private static UnicodeSet GC_Control =
|
||||
new UnicodeSet("[[:Zl:][:Zp:][:Cc:][:Cf:]-[\\u000d\\u000a]]");
|
||||
@ -242,8 +274,283 @@ public class RBBITestMonkey extends TestFmwk {
|
||||
return pos;
|
||||
}
|
||||
|
||||
public void TestCharMonkey() {
|
||||
logln("Hello from CharMonkeyTest");
|
||||
|
||||
/**
|
||||
* random number generator. Not using Java's built-in Randoms for two reasons:
|
||||
* 1. Using this code allows obtaining the same sequences as those from the ICU4C monkey test.
|
||||
* 2. We need to get and restore the seed from values occuring in the middle
|
||||
* of a long sequence, to more easily reproduce failing cases.
|
||||
*/
|
||||
private static int m_seed = 1;
|
||||
private static int m_rand()
|
||||
{
|
||||
m_seed = m_seed * 1103515245 + 12345;
|
||||
return (int)(m_seed/65536) % 32768;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Run a RBBI monkey test. Common routine, for all break iterator types.
|
||||
* Parameters:
|
||||
* bi - the break iterator to use
|
||||
* mk - MonkeyKind, abstraction for obtaining expected results
|
||||
* name - Name of test (char, word, etc.) for use in error messages
|
||||
* seed - Seed for starting random number generator (parameter from user)
|
||||
* numIterations
|
||||
*/
|
||||
void RunMonkey(BreakIterator bi, RBBIMonkeyKind mk, String name, int seed, int numIterations) {
|
||||
int TESTSTRINGLEN = 500;
|
||||
StringBuffer testText = new StringBuffer();
|
||||
int numCharClasses;
|
||||
List chClasses;
|
||||
int[] expected = new int[TESTSTRINGLEN*2 + 1];
|
||||
int expectedCount = 0;
|
||||
boolean[] expectedBreaks = new boolean[TESTSTRINGLEN*2 + 1];
|
||||
boolean[] forwardBreaks = new boolean[TESTSTRINGLEN*2 + 1];
|
||||
boolean[] reverseBreaks = new boolean[TESTSTRINGLEN*2+1];
|
||||
boolean[] isBoundaryBreaks = new boolean[TESTSTRINGLEN*2+1];
|
||||
int i;
|
||||
int loopCount = 0;
|
||||
|
||||
m_seed = seed;
|
||||
|
||||
numCharClasses = mk.charClasses().size();
|
||||
chClasses = mk.charClasses();
|
||||
|
||||
// Verify that the character classes all have at least one member.
|
||||
for (i=0; i<numCharClasses; i++) {
|
||||
UnicodeSet s = (UnicodeSet)chClasses.get(i);
|
||||
if (s == null || s.size() == 0) {
|
||||
errln("Character Class " + i + " is null or of zero size.");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
while (loopCount < numIterations || numIterations == -1) {
|
||||
if (numIterations == -1 && loopCount % 10 == 0) {
|
||||
// If test is running in an infinite loop, display a periodic tic so
|
||||
// we can tell that it is making progress.
|
||||
System.out.print(".");
|
||||
}
|
||||
// Save current random number seed, so that we can recreate the random numbers
|
||||
// for this loop iteration in event of an error.
|
||||
seed = m_seed;
|
||||
|
||||
testText.setLength(0);
|
||||
// Populate a test string with data.
|
||||
for (i=0; i<TESTSTRINGLEN; i++) {
|
||||
int aClassNum = m_rand() % numCharClasses;
|
||||
UnicodeSet classSet = (UnicodeSet)chClasses.get(aClassNum);
|
||||
int charIdx = m_rand() % classSet.size();
|
||||
int c = classSet.charAt(charIdx);
|
||||
if (c < 0) { // TODO: deal with sets containing strings.
|
||||
errln("c < 0");
|
||||
}
|
||||
testText.append(c);
|
||||
}
|
||||
|
||||
Arrays.fill(expected, 0);
|
||||
Arrays.fill(expectedBreaks, false);
|
||||
Arrays.fill(forwardBreaks, false);
|
||||
Arrays.fill(reverseBreaks, false);
|
||||
Arrays.fill(isBoundaryBreaks, false);
|
||||
|
||||
// Calculate the expected results for this test string.
|
||||
mk.setText(testText.toString());
|
||||
expectedBreaks[0] = true;
|
||||
int breakPos = 0;
|
||||
expectedCount = 0;
|
||||
for (;;) {
|
||||
breakPos = mk.next(breakPos);
|
||||
if (breakPos == -1) {
|
||||
break;
|
||||
}
|
||||
if (breakPos > testText.length()) {
|
||||
errln("breakPos > testText.length()");
|
||||
}
|
||||
expectedBreaks[breakPos] = true;
|
||||
expected[expectedCount ++] = breakPos;
|
||||
}
|
||||
|
||||
// Find the break positions using forward iteration
|
||||
bi.setText(testText.toString());
|
||||
for (i=bi.first(); i != BreakIterator.DONE; i=bi.next()) {
|
||||
if (i < 0 || i > testText.length()) {
|
||||
errln(name + " break monkey test: Out of range value returned by breakIterator::next()");
|
||||
break;
|
||||
}
|
||||
forwardBreaks[i] = true;
|
||||
}
|
||||
|
||||
// Find the break positions using reverse iteration
|
||||
for (i=bi.last(); i != BreakIterator.DONE; i=bi.previous()) {
|
||||
if (i < 0 || i > testText.length()) {
|
||||
errln(name + " break monkey test: Out of range value returned by breakIterator.next()" + name);
|
||||
break;
|
||||
}
|
||||
reverseBreaks[i] = true;
|
||||
}
|
||||
|
||||
// Find the break positions using isBoundary() tests.
|
||||
for (i=0; i<=testText.length(); i++) {
|
||||
isBoundaryBreaks[i] = bi.isBoundary(i);
|
||||
}
|
||||
|
||||
|
||||
// Compare the expected and actual results.
|
||||
for (i=0; i<=testText.length(); i++) {
|
||||
String errorType = null;
|
||||
if (forwardBreaks[i] != expectedBreaks[i]) {
|
||||
errorType = "next()";
|
||||
} else if (reverseBreaks[i] != forwardBreaks[i]) {
|
||||
errorType = "previous()";
|
||||
} else if (isBoundaryBreaks[i] != expectedBreaks[i]) {
|
||||
errorType = "isBoundary()";
|
||||
}
|
||||
|
||||
|
||||
if (errorType != null) {
|
||||
// Format a range of the test text that includes the failure as
|
||||
// a data item that can be included in the rbbi test data file.
|
||||
|
||||
// Start of the range is the last point where expected and actual results
|
||||
// both agreed that there was a break position.
|
||||
int startContext = i;
|
||||
int count = 0;
|
||||
for (;;) {
|
||||
if (startContext==0) { break; }
|
||||
startContext --;
|
||||
if (expectedBreaks[startContext]) {
|
||||
if (count == 2) break;
|
||||
count ++;
|
||||
}
|
||||
}
|
||||
|
||||
// End of range is two expected breaks past the start position.
|
||||
int endContext = i + 1;
|
||||
int ci;
|
||||
for (ci=0; ci<2; ci++) { // Number of items to include in error text.
|
||||
for (;;) {
|
||||
if (endContext >= testText.length()) {break;}
|
||||
if (expectedBreaks[endContext-1]) {
|
||||
if (count == 0) break;
|
||||
count --;
|
||||
}
|
||||
endContext ++;
|
||||
}
|
||||
}
|
||||
|
||||
// Format looks like "<data><>\uabcd\uabcd<>\U0001abcd...</data>"
|
||||
StringBuffer errorText = new StringBuffer();
|
||||
errorText.append("<data>");
|
||||
|
||||
StringUCharacterIterator sci = new StringUCharacterIterator();
|
||||
sci.setText(errorText.toString());
|
||||
sci.setIndex(startContext);
|
||||
String hexChars = "0123456789abcdef";
|
||||
int c;
|
||||
int bn;
|
||||
while (true) {
|
||||
ci = sci.getIndex();
|
||||
c = sci.nextCodePoint();
|
||||
if (ci >= endContext) {
|
||||
break;
|
||||
}
|
||||
if (ci == i) {
|
||||
// This is the location of the error.
|
||||
errorText.append("<?>");
|
||||
} else if (expectedBreaks[ci]) {
|
||||
// This a non-error expected break position.
|
||||
errorText.append("<>");
|
||||
}
|
||||
if (c < 0x10000) {
|
||||
errorText.append("\\u");
|
||||
for (bn=12; bn>=0; bn-=4) {
|
||||
errorText.append(hexChars.charAt((((int)c)>>bn)&0xf));
|
||||
}
|
||||
} else {
|
||||
errorText.append("\\U");
|
||||
for (bn=28; bn>=0; bn-=4) {
|
||||
errorText.append(hexChars.charAt((((int)c)>>bn)&0xf));
|
||||
}
|
||||
}
|
||||
}
|
||||
errorText.append("<>");
|
||||
errorText.append("</data>\n");
|
||||
|
||||
// Output the error
|
||||
errln(name + " break monkey test error. " +
|
||||
(expectedBreaks[i]? "break expected but not found" : "break found but not expected") +
|
||||
"Operation = " + errorType + "; Random seed = " + seed + "; buf Idx = " + i + "\n" +
|
||||
errorText);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
loopCount++;
|
||||
}
|
||||
}
|
||||
|
||||
public void TestMonkey(/* String[] params */) {
|
||||
|
||||
int loopCount = 500;
|
||||
int seed = 1;
|
||||
String breakType = "all";
|
||||
boolean quick = true; // TODO: from test framework in C++. What's the equivalent here?
|
||||
// Locale locale("en");
|
||||
|
||||
if (quick == false) {
|
||||
loopCount = 10000;
|
||||
}
|
||||
|
||||
/*
|
||||
if (params) {
|
||||
UnicodeString p(params);
|
||||
loopCount = getIntParam("loop", p, loopCount);
|
||||
seed = getIntParam("seed", p, seed);
|
||||
|
||||
RegexMatcher m(" *type *= *(char|word|line|sent|title) *", p, 0, status);
|
||||
if (m.find()) {
|
||||
breakType = m.group(1, status);
|
||||
m.reset();
|
||||
p = m.replaceFirst("", status);
|
||||
}
|
||||
|
||||
m.reset(p);
|
||||
if (RegexMatcher("\\S", p, 0, status).find()) {
|
||||
// Each option is stripped out of the option string as it is processed.
|
||||
// All options have been checked. The option string should have been completely emptied..
|
||||
char buf[100];
|
||||
p.extract(buf, sizeof(buf), NULL, status);
|
||||
buf[sizeof(buf)-1] = 0;
|
||||
errln("Unrecognized or extra parameter: %s\n", buf);
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
*/
|
||||
if (breakType == "char" || breakType == "all") {
|
||||
RBBICharMonkey m = new RBBICharMonkey();
|
||||
BreakIterator bi = BreakIterator.getCharacterInstance(Locale.US);
|
||||
//RunMonkey(bi, m, "char", seed, loopCount);
|
||||
}
|
||||
|
||||
if (breakType == "word" || breakType == "all") {
|
||||
logln("Word Break Monkey Test");
|
||||
RBBIWordMonkey m = new RBBIWordMonkey();
|
||||
BreakIterator bi = BreakIterator.getWordInstance(Locale.US);
|
||||
//RunMonkey(bi, m, "word", seed, loopCount);
|
||||
}
|
||||
|
||||
if (breakType == "line" || breakType == "all") {
|
||||
logln("Line Break Monkey Test");
|
||||
RBBILineMonkey m = new RBBILineMonkey();
|
||||
BreakIterator bi = BreakIterator.getLineInstance(Locale.US);
|
||||
if (params == null) {
|
||||
loopCount = 50;
|
||||
}
|
||||
//RunMonkey(bi, m, "line", seed, loopCount);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user