ICU-3295 RBBI runtime port to Java
X-SVN-Rev: 15029
This commit is contained in:
parent
4514814ae6
commit
b6b23502af
@ -194,7 +194,7 @@
|
|||||||
<target name ="coreData" depends="init">
|
<target name ="coreData" depends="init">
|
||||||
<copy todir="${build.dir}/com/ibm/icu/impl/data">
|
<copy todir="${build.dir}/com/ibm/icu/impl/data">
|
||||||
<fileset dir="${src.dir}/com/ibm/icu/impl/data"
|
<fileset dir="${src.dir}/com/ibm/icu/impl/data"
|
||||||
includes="Transliterator_*.txt,*.icu,*.spp"
|
includes="Transliterator_*.txt,*.icu,*.spp,*.brk"
|
||||||
excludes="**/CVS/**/*,Transliterator_Han_Latin_*.txt"/>
|
excludes="**/CVS/**/*,Transliterator_Han_Latin_*.txt"/>
|
||||||
</copy>
|
</copy>
|
||||||
</target>
|
</target>
|
||||||
|
@ -8,6 +8,7 @@ package com.ibm.icu.dev.test.rbbi;
|
|||||||
|
|
||||||
import com.ibm.icu.dev.test.*;
|
import com.ibm.icu.dev.test.*;
|
||||||
import com.ibm.icu.text.BreakIterator;
|
import com.ibm.icu.text.BreakIterator;
|
||||||
|
import com.ibm.icu.text.RuleBasedBreakIterator_Old;
|
||||||
import java.text.StringCharacterIterator;
|
import java.text.StringCharacterIterator;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Vector;
|
import java.util.Vector;
|
||||||
@ -385,6 +386,8 @@ public class BreakIteratorTest extends TestFmwk
|
|||||||
//=========================================================================
|
//=========================================================================
|
||||||
|
|
||||||
public void TestWordBreak() {
|
public void TestWordBreak() {
|
||||||
|
try {
|
||||||
|
RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)wordBreak;
|
||||||
Vector wordSelectionData = new Vector();
|
Vector wordSelectionData = new Vector();
|
||||||
|
|
||||||
wordSelectionData.addElement("12,34");
|
wordSelectionData.addElement("12,34");
|
||||||
@ -465,6 +468,10 @@ public class BreakIteratorTest extends TestFmwk
|
|||||||
|
|
||||||
generalIteratorTest(wordBreak, wordSelectionData);
|
generalIteratorTest(wordBreak, wordSelectionData);
|
||||||
}
|
}
|
||||||
|
catch (ClassCastException e) {
|
||||||
|
logln("New Break Iterator, skipping old test");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @bug 4097779
|
* @bug 4097779
|
||||||
@ -514,6 +521,8 @@ public class BreakIteratorTest extends TestFmwk
|
|||||||
* @bug 4117554
|
* @bug 4117554
|
||||||
*/
|
*/
|
||||||
public void TestBug4117554Words() {
|
public void TestBug4117554Words() {
|
||||||
|
try {
|
||||||
|
RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)wordBreak;
|
||||||
Vector wordSelectionData = new Vector();
|
Vector wordSelectionData = new Vector();
|
||||||
|
|
||||||
// this is a test for bug #4117554: the ideographic iteration mark (U+3005) should
|
// this is a test for bug #4117554: the ideographic iteration mark (U+3005) should
|
||||||
@ -524,8 +533,14 @@ public class BreakIteratorTest extends TestFmwk
|
|||||||
|
|
||||||
generalIteratorTest(wordBreak, wordSelectionData);
|
generalIteratorTest(wordBreak, wordSelectionData);
|
||||||
}
|
}
|
||||||
|
catch (ClassCastException e) {
|
||||||
|
logln("New Break Iterator, skipping old test");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void TestSentenceBreak() {
|
public void TestSentenceBreak() {
|
||||||
|
try {
|
||||||
|
RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)sentenceBreak;
|
||||||
Vector sentenceSelectionData = new Vector();
|
Vector sentenceSelectionData = new Vector();
|
||||||
|
|
||||||
sentenceSelectionData.addElement("This is a simple sample sentence. ");
|
sentenceSelectionData.addElement("This is a simple sample sentence. ");
|
||||||
@ -559,11 +574,18 @@ public class BreakIteratorTest extends TestFmwk
|
|||||||
|
|
||||||
generalIteratorTest(sentenceBreak, sentenceSelectionData);
|
generalIteratorTest(sentenceBreak, sentenceSelectionData);
|
||||||
}
|
}
|
||||||
|
catch (ClassCastException e) {
|
||||||
|
logln("New Break Iterator, skipping old test");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @bug 4113835
|
* @bug 4113835
|
||||||
*/
|
*/
|
||||||
public void TestBug4113835() {
|
public void TestBug4113835() {
|
||||||
|
try {
|
||||||
|
RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)sentenceBreak;
|
||||||
|
|
||||||
Vector sentenceSelectionData = new Vector();
|
Vector sentenceSelectionData = new Vector();
|
||||||
|
|
||||||
// test for bug #4113835: \n and \r count as spaces, not as paragraph breaks
|
// test for bug #4113835: \n and \r count as spaces, not as paragraph breaks
|
||||||
@ -571,6 +593,10 @@ public class BreakIteratorTest extends TestFmwk
|
|||||||
|
|
||||||
generalIteratorTest(sentenceBreak, sentenceSelectionData);
|
generalIteratorTest(sentenceBreak, sentenceSelectionData);
|
||||||
}
|
}
|
||||||
|
catch (ClassCastException e) {
|
||||||
|
logln("New Break Iterator, skipping old test");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @bug 4111338
|
* @bug 4111338
|
||||||
@ -598,6 +624,8 @@ public class BreakIteratorTest extends TestFmwk
|
|||||||
* @bug 4117554
|
* @bug 4117554
|
||||||
*/
|
*/
|
||||||
public void TestBug4117554Sentences() {
|
public void TestBug4117554Sentences() {
|
||||||
|
try {
|
||||||
|
RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)sentenceBreak;
|
||||||
Vector sentenceSelectionData = new Vector();
|
Vector sentenceSelectionData = new Vector();
|
||||||
|
|
||||||
// Treat fullwidth variants of .!? the same as their
|
// Treat fullwidth variants of .!? the same as their
|
||||||
@ -618,11 +646,17 @@ public class BreakIteratorTest extends TestFmwk
|
|||||||
|
|
||||||
generalIteratorTest(sentenceBreak, sentenceSelectionData);
|
generalIteratorTest(sentenceBreak, sentenceSelectionData);
|
||||||
}
|
}
|
||||||
|
catch (ClassCastException e) {
|
||||||
|
logln("New Break Iterator, skipping old test");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @bug 4158381
|
* @bug 4158381
|
||||||
*/
|
*/
|
||||||
public void TestBug4158381() {
|
public void TestBug4158381() {
|
||||||
|
try {
|
||||||
|
RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)sentenceBreak;
|
||||||
Vector sentenceSelectionData = new Vector();
|
Vector sentenceSelectionData = new Vector();
|
||||||
|
|
||||||
// Don't break sentence after period if it isn't followed by a space
|
// Don't break sentence after period if it isn't followed by a space
|
||||||
@ -638,6 +672,10 @@ public class BreakIteratorTest extends TestFmwk
|
|||||||
|
|
||||||
generalIteratorTest(sentenceBreak, sentenceSelectionData);
|
generalIteratorTest(sentenceBreak, sentenceSelectionData);
|
||||||
}
|
}
|
||||||
|
catch (ClassCastException e) {
|
||||||
|
logln("New Break Iterator, skipping old test");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @bug 4143071
|
* @bug 4143071
|
||||||
@ -767,7 +805,6 @@ public class BreakIteratorTest extends TestFmwk
|
|||||||
lineSelectionData.addElement("mouse ");
|
lineSelectionData.addElement("mouse ");
|
||||||
lineSelectionData.addElement("(one)");
|
lineSelectionData.addElement("(one)");
|
||||||
lineSelectionData.addElement("(two)\n");
|
lineSelectionData.addElement("(two)\n");
|
||||||
|
|
||||||
generalIteratorTest(lineBreak, lineSelectionData);
|
generalIteratorTest(lineBreak, lineSelectionData);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -775,6 +812,8 @@ public class BreakIteratorTest extends TestFmwk
|
|||||||
* @bug 4035266
|
* @bug 4035266
|
||||||
*/
|
*/
|
||||||
public void TestBug4035266() {
|
public void TestBug4035266() {
|
||||||
|
try {
|
||||||
|
RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)lineBreak;
|
||||||
Vector lineSelectionData = new Vector();
|
Vector lineSelectionData = new Vector();
|
||||||
|
|
||||||
lineSelectionData.addElement("The ");
|
lineSelectionData.addElement("The ");
|
||||||
@ -786,11 +825,17 @@ public class BreakIteratorTest extends TestFmwk
|
|||||||
|
|
||||||
generalIteratorTest(lineBreak, lineSelectionData);
|
generalIteratorTest(lineBreak, lineSelectionData);
|
||||||
}
|
}
|
||||||
|
catch (ClassCastException e) {
|
||||||
|
logln("New Break Iterator, skipping old test");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @bug 4098467
|
* @bug 4098467
|
||||||
*/
|
*/
|
||||||
public void TestBug4098467Lines() {
|
public void TestBug4098467Lines() {
|
||||||
|
try {
|
||||||
|
RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)lineBreak;
|
||||||
Vector lineSelectionData = new Vector();
|
Vector lineSelectionData = new Vector();
|
||||||
|
|
||||||
// What follows is a string of Korean characters (I found it in the Yellow Pages
|
// What follows is a string of Korean characters (I found it in the Yellow Pages
|
||||||
@ -810,6 +855,10 @@ public class BreakIteratorTest extends TestFmwk
|
|||||||
|
|
||||||
generalIteratorTest(lineBreak, lineSelectionData);
|
generalIteratorTest(lineBreak, lineSelectionData);
|
||||||
}
|
}
|
||||||
|
catch (ClassCastException e) {
|
||||||
|
logln("New Break Iterator, skipping old test");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void TestThaiLineBreak() {
|
public void TestThaiLineBreak() {
|
||||||
Vector lineSelectionData = new Vector();
|
Vector lineSelectionData = new Vector();
|
||||||
@ -949,6 +998,8 @@ lineSelectionData.addElement("(\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\
|
|||||||
* @bug 4217703
|
* @bug 4217703
|
||||||
*/
|
*/
|
||||||
public void TestBug4217703() {
|
public void TestBug4217703() {
|
||||||
|
try {
|
||||||
|
RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)lineBreak;
|
||||||
Vector lineSelectionData = new Vector();
|
Vector lineSelectionData = new Vector();
|
||||||
|
|
||||||
// There shouldn't be a line break between sentence-ending punctuation
|
// There shouldn't be a line break between sentence-ending punctuation
|
||||||
@ -965,6 +1016,10 @@ lineSelectionData.addElement("(\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\
|
|||||||
|
|
||||||
generalIteratorTest(lineBreak, lineSelectionData);
|
generalIteratorTest(lineBreak, lineSelectionData);
|
||||||
}
|
}
|
||||||
|
catch (ClassCastException e) {
|
||||||
|
logln("New Break Iterator, skipping old test");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static final String graveS = "S\u0300";
|
private static final String graveS = "S\u0300";
|
||||||
private static final String acuteBelowI = "i\u0317";
|
private static final String acuteBelowI = "i\u0317";
|
||||||
@ -1091,6 +1146,8 @@ lineSelectionData.addElement("(\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\
|
|||||||
}
|
}
|
||||||
|
|
||||||
public void TestBug4146175Sentences() {
|
public void TestBug4146175Sentences() {
|
||||||
|
try {
|
||||||
|
RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)lineBreak;
|
||||||
Vector sentenceSelectionData = new Vector();
|
Vector sentenceSelectionData = new Vector();
|
||||||
|
|
||||||
// break between periods and opening punctuation even when there's no
|
// break between periods and opening punctuation even when there's no
|
||||||
@ -1104,6 +1161,10 @@ lineSelectionData.addElement("(\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\
|
|||||||
|
|
||||||
generalIteratorTest(sentenceBreak, sentenceSelectionData);
|
generalIteratorTest(sentenceBreak, sentenceSelectionData);
|
||||||
}
|
}
|
||||||
|
catch (ClassCastException e) {
|
||||||
|
logln("New Break Iterator, skipping old test");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void TestBug4146175Lines() {
|
public void TestBug4146175Lines() {
|
||||||
Vector lineSelectionData = new Vector();
|
Vector lineSelectionData = new Vector();
|
||||||
@ -1116,6 +1177,8 @@ lineSelectionData.addElement("(\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\
|
|||||||
}
|
}
|
||||||
|
|
||||||
public void TestBug4214367() {
|
public void TestBug4214367() {
|
||||||
|
try {
|
||||||
|
RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)wordBreak;
|
||||||
Vector wordSelectionData = new Vector();
|
Vector wordSelectionData = new Vector();
|
||||||
|
|
||||||
// the hiragana and katakana iteration marks and the long vowel mark
|
// the hiragana and katakana iteration marks and the long vowel mark
|
||||||
@ -1125,6 +1188,10 @@ lineSelectionData.addElement("(\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\
|
|||||||
|
|
||||||
generalIteratorTest(wordBreak, wordSelectionData);
|
generalIteratorTest(wordBreak, wordSelectionData);
|
||||||
}
|
}
|
||||||
|
catch (ClassCastException e) {
|
||||||
|
logln("New Break Iterator, skipping old test");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static final String cannedTestChars
|
private static final String cannedTestChars
|
||||||
= "\u0000\u0001\u0002\u0003\u0004 !\"#$%&()+-01234<=>ABCDE[]^_`abcde{}|\u00a0\u00a2"
|
= "\u0000\u0001\u0002\u0003\u0004 !\"#$%&()+-01234<=>ABCDE[]^_`abcde{}|\u00a0\u00a2"
|
||||||
@ -1143,15 +1210,23 @@ lineSelectionData.addElement("(\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\
|
|||||||
public void TestWordInvariants()
|
public void TestWordInvariants()
|
||||||
{
|
{
|
||||||
BreakIterator e = BreakIterator.getWordInstance();
|
BreakIterator e = BreakIterator.getWordInstance();
|
||||||
|
try {
|
||||||
|
RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)e;
|
||||||
doBreakInvariantTest(e, cannedTestChars + "\',.\u3041\u3042\u3043\u309b\u309c\u30a1\u30a2"
|
doBreakInvariantTest(e, cannedTestChars + "\',.\u3041\u3042\u3043\u309b\u309c\u30a1\u30a2"
|
||||||
+ "\u30a3\u4e00\u4e01\u4e02");
|
+ "\u30a3\u4e00\u4e01\u4e02");
|
||||||
doOtherInvariantTest(e, cannedTestChars + "\',.\u3041\u3042\u3043\u309b\u309c\u30a1\u30a2"
|
doOtherInvariantTest(e, cannedTestChars + "\',.\u3041\u3042\u3043\u309b\u309c\u30a1\u30a2"
|
||||||
+ "\u30a3\u4e00\u4e01\u4e02");
|
+ "\u30a3\u4e00\u4e01\u4e02");
|
||||||
}
|
}
|
||||||
|
catch (ClassCastException ex) {
|
||||||
|
logln("New Break Iterator, skipping old test");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void TestLineInvariants()
|
public void TestLineInvariants()
|
||||||
{
|
{
|
||||||
BreakIterator e = BreakIterator.getLineInstance();
|
BreakIterator e = BreakIterator.getLineInstance();
|
||||||
|
try {
|
||||||
|
RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)e;
|
||||||
String testChars = cannedTestChars + ".,;:\u3001\u3002\u3041\u3042\u3043\u3044\u3045"
|
String testChars = cannedTestChars + ".,;:\u3001\u3002\u3041\u3042\u3043\u3044\u3045"
|
||||||
+ "\u30a3\u4e00\u4e01\u4e02";
|
+ "\u30a3\u4e00\u4e01\u4e02";
|
||||||
doBreakInvariantTest(e, testChars);
|
doBreakInvariantTest(e, testChars);
|
||||||
@ -1227,15 +1302,25 @@ lineSelectionData.addElement("(\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
catch (ClassCastException ex) {
|
||||||
|
logln("New Break Iterator, skipping old test");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void TestCharacterInvariants()
|
public void TestCharacterInvariants()
|
||||||
{
|
{
|
||||||
BreakIterator e = BreakIterator.getCharacterInstance();
|
BreakIterator e = BreakIterator.getCharacterInstance();
|
||||||
|
try {
|
||||||
|
RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)e;
|
||||||
doBreakInvariantTest(e, cannedTestChars + "\u1100\u1101\u1102\u1160\u1161\u1162\u11a8"
|
doBreakInvariantTest(e, cannedTestChars + "\u1100\u1101\u1102\u1160\u1161\u1162\u11a8"
|
||||||
+ "\u11a9\u11aa");
|
+ "\u11a9\u11aa");
|
||||||
doOtherInvariantTest(e, cannedTestChars + "\u1100\u1101\u1102\u1160\u1161\u1162\u11a8"
|
doOtherInvariantTest(e, cannedTestChars + "\u1100\u1101\u1102\u1160\u1161\u1162\u11a8"
|
||||||
+ "\u11a9\u11aa");
|
+ "\u11a9\u11aa");
|
||||||
}
|
}
|
||||||
|
catch (ClassCastException ex) {
|
||||||
|
logln("New Break Iterator, skipping old test");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void TestEmptyString()
|
public void TestEmptyString()
|
||||||
{
|
{
|
||||||
@ -1264,6 +1349,8 @@ lineSelectionData.addElement("(\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\
|
|||||||
String precedingChars = "([{\u00ab$\u00a5\u00a3\u00a4\u2018\u201a\u201c\u201e\u201b\u201f";
|
String precedingChars = "([{\u00ab$\u00a5\u00a3\u00a4\u2018\u201a\u201c\u201e\u201b\u201f";
|
||||||
String followingChars = ")]}\u00bb!%,.\u3001\u3002\u3063\u3083\u3085\u3087\u30c3\u30e3\u30e5\u30e7\u30fc:;\u309b\u309c\u3005\u309d\u309e\u30fd\u30fe\u2019\u201d\u00b0\u2032\u2033\u2034\u2030\u2031\u2103\u2109\u00a2\u0300\u0301\u0302";
|
String followingChars = ")]}\u00bb!%,.\u3001\u3002\u3063\u3083\u3085\u3087\u30c3\u30e3\u30e5\u30e7\u30fc:;\u309b\u309c\u3005\u309d\u309e\u30fd\u30fe\u2019\u201d\u00b0\u2032\u2033\u2034\u2030\u2031\u2103\u2109\u00a2\u0300\u0301\u0302";
|
||||||
BreakIterator iter = BreakIterator.getLineInstance(Locale.JAPAN);
|
BreakIterator iter = BreakIterator.getLineInstance(Locale.JAPAN);
|
||||||
|
try {
|
||||||
|
RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)iter;
|
||||||
|
|
||||||
for (int i = 0; i < precedingChars.length(); i++) {
|
for (int i = 0; i < precedingChars.length(); i++) {
|
||||||
testString.setCharAt(1, precedingChars.charAt(i));
|
testString.setCharAt(1, precedingChars.charAt(i));
|
||||||
@ -1297,6 +1384,10 @@ lineSelectionData.addElement("(\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\
|
|||||||
+ "' (" + ((int)(followingChars.charAt(i))) + ")");
|
+ "' (" + ((int)(followingChars.charAt(i))) + ")");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
catch (ClassCastException e) {
|
||||||
|
logln("New Break Iterator, skipping old test");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Bug 4638433
|
* Bug 4638433
|
||||||
@ -1309,6 +1400,8 @@ lineSelectionData.addElement("(\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\
|
|||||||
* 0x0218-0x0233 which have been added since Unicode 3.0.0.
|
* 0x0218-0x0233 which have been added since Unicode 3.0.0.
|
||||||
*/
|
*/
|
||||||
iter = BreakIterator.getWordInstance(Locale.US);
|
iter = BreakIterator.getWordInstance(Locale.US);
|
||||||
|
try {
|
||||||
|
RuleBasedBreakIterator_Old olb = (RuleBasedBreakIterator_Old)iter;
|
||||||
iter.setText("\u0216\u0217\u0218\u0219\u021A");
|
iter.setText("\u0216\u0217\u0218\u0219\u021A");
|
||||||
i = iter.first();
|
i = iter.first();
|
||||||
i = iter.next();
|
i = iter.next();
|
||||||
@ -1386,6 +1479,10 @@ lineSelectionData.addElement("(\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\
|
|||||||
errln("Thai line break failure: failed to stop after \\u201D(Pf) at 4 and bounded at " + i);
|
errln("Thai line break failure: failed to stop after \\u201D(Pf) at 4 and bounded at " + i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
catch (ClassCastException e) {
|
||||||
|
logln("New Break Iterator, skipping old test");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @bug 4068137
|
* @bug 4068137
|
||||||
|
@ -186,7 +186,7 @@ public class RBBIAPITest extends com.ibm.icu.dev.test.TestFmwk {
|
|||||||
errln("ERROR: next()/following() at last position returned #"
|
errln("ERROR: next()/following() at last position returned #"
|
||||||
+ p + " and " + q + " instead of" + testString.length() + "\n");
|
+ p + " and " + q + " instead of" + testString.length() + "\n");
|
||||||
RuleBasedBreakIterator charIter1 = (RuleBasedBreakIterator) RuleBasedBreakIterator.getCharacterInstance(Locale.getDefault());
|
RuleBasedBreakIterator charIter1 = (RuleBasedBreakIterator) RuleBasedBreakIterator.getCharacterInstance(Locale.getDefault());
|
||||||
testString = "Write hindi here. \u092d\u093e\u0930\u0924 \u0938\u0941\u0902\u0926\u0930 \u0939\u094c\u0964";
|
testString = "Write hindi here. \u092d\u093e\u0930\u0301 \u0938\u0941\u0902\u0926\u0930 \u0939\u094c\u0964";
|
||||||
logln("testing char iter - string:- \"" + testString + "\"");
|
logln("testing char iter - string:- \"" + testString + "\"");
|
||||||
charIter1.setText(testString);
|
charIter1.setText(testString);
|
||||||
p = charIter1.first();
|
p = charIter1.first();
|
||||||
@ -209,7 +209,7 @@ public class RBBIAPITest extends com.ibm.icu.dev.test.TestFmwk {
|
|||||||
// hindi starts here
|
// hindi starts here
|
||||||
p = q;
|
p = q;
|
||||||
q = charIter1.next(4);
|
q = charIter1.next(4);
|
||||||
doTest(testString, p, q, 22, " \u092d\u093e\u0930\u0924");
|
doTest(testString, p, q, 22, " \u092d\u093e\u0930\u0301"); // Nonsense, but compatible between old and new rules.
|
||||||
p = q;
|
p = q;
|
||||||
q = charIter1.next(2);
|
q = charIter1.next(2);
|
||||||
doTest(testString, p, q, 26, " \u0938\u0941\u0902");
|
doTest(testString, p, q, 26, " \u0938\u0941\u0902");
|
||||||
@ -217,13 +217,13 @@ public class RBBIAPITest extends com.ibm.icu.dev.test.TestFmwk {
|
|||||||
q = charIter1.following(24);
|
q = charIter1.following(24);
|
||||||
doTest(testString, 24, q, 26, "\u0941\u0902");
|
doTest(testString, 24, q, 26, "\u0941\u0902");
|
||||||
q = charIter1.following(20);
|
q = charIter1.following(20);
|
||||||
doTest(testString, 20, q, 21, "\u0930");
|
doTest(testString, 20, q, 22, "\u0930\u0301");
|
||||||
p = charIter1.following(charIter1.last());
|
p = charIter1.following(charIter1.last());
|
||||||
q = charIter1.next(charIter1.last());
|
q = charIter1.next(charIter1.last());
|
||||||
if (p != RuleBasedBreakIterator.DONE || q != RuleBasedBreakIterator.DONE)
|
if (p != RuleBasedBreakIterator.DONE || q != RuleBasedBreakIterator.DONE)
|
||||||
errln("ERROR: following()/next() at last position returned #"
|
errln("ERROR: following()/next() at last position returned #"
|
||||||
+ p + " and " + q + " instead of" + testString.length());
|
+ p + " and " + q + " instead of" + testString.length());
|
||||||
testString = "Hello! how are you? I'am fine. Thankyou. How are you doing? This\n costs $20,00,000.";
|
testString = "Hello! how are you? I'am fine. Thankyou. How are you doing? This costs $20,00,000.";
|
||||||
RuleBasedBreakIterator sentIter1 = (RuleBasedBreakIterator) RuleBasedBreakIterator.getSentenceInstance(Locale.getDefault());
|
RuleBasedBreakIterator sentIter1 = (RuleBasedBreakIterator) RuleBasedBreakIterator.getSentenceInstance(Locale.getDefault());
|
||||||
logln("testing sentence iter - String:- \"" + testString + "\"");
|
logln("testing sentence iter - String:- \"" + testString + "\"");
|
||||||
sentIter1.setText(testString);
|
sentIter1.setText(testString);
|
||||||
@ -243,7 +243,7 @@ public class RBBIAPITest extends com.ibm.icu.dev.test.TestFmwk {
|
|||||||
doTest(testString, p, q, 60, "how are you? I'am fine. Thankyou. How are you doing? ");
|
doTest(testString, p, q, 60, "how are you? I'am fine. Thankyou. How are you doing? ");
|
||||||
p = q;
|
p = q;
|
||||||
q = sentIter1.next();
|
q = sentIter1.next();
|
||||||
doTest(testString, p, q, 83, "This\n costs $20,00,000.");
|
doTest(testString, p, q, 83, "This costs $20,00,000.");
|
||||||
q = sentIter1.following(1);
|
q = sentIter1.following(1);
|
||||||
doTest(testString, 1, q, 7, "ello! ");
|
doTest(testString, 1, q, 7, "ello! ");
|
||||||
q = sentIter1.following(10);
|
q = sentIter1.following(10);
|
||||||
@ -324,7 +324,7 @@ public class RBBIAPITest extends com.ibm.icu.dev.test.TestFmwk {
|
|||||||
p = wordIter1.preceding(wordIter1.first());
|
p = wordIter1.preceding(wordIter1.first());
|
||||||
if (p != RuleBasedBreakIterator.DONE)
|
if (p != RuleBasedBreakIterator.DONE)
|
||||||
errln("ERROR: preceding() at starting position returned #" + p + " instead of 0");
|
errln("ERROR: preceding() at starting position returned #" + p + " instead of 0");
|
||||||
testString = "Write hindi here. \u092d\u093e\u0930\u0924 \u0938\u0941\u0902\u0926\u0930 \u0939\u094c\u0964";
|
testString = "Write hindi here. \u092d\u093e\u0930\u0924 \u0938\u0941\u0902\u0926\u0930 \u0939\u0301\u0964";
|
||||||
logln("testing character iteration for string \" " + testString + "\" \n");
|
logln("testing character iteration for string \" " + testString + "\" \n");
|
||||||
RuleBasedBreakIterator charIter1 = (RuleBasedBreakIterator) RuleBasedBreakIterator.getCharacterInstance(Locale.getDefault());
|
RuleBasedBreakIterator charIter1 = (RuleBasedBreakIterator) RuleBasedBreakIterator.getCharacterInstance(Locale.getDefault());
|
||||||
charIter1.setText(testString);
|
charIter1.setText(testString);
|
||||||
@ -335,7 +335,7 @@ public class RBBIAPITest extends com.ibm.icu.dev.test.TestFmwk {
|
|||||||
doTest(testString, p, q, 31, "\u0964");
|
doTest(testString, p, q, 31, "\u0964");
|
||||||
p = q;
|
p = q;
|
||||||
q = charIter1.previous();
|
q = charIter1.previous();
|
||||||
doTest(testString, p, q, 29, "\u0939\u094c");
|
doTest(testString, p, q, 29, "\u0939\u0301");
|
||||||
q = charIter1.preceding(26);
|
q = charIter1.preceding(26);
|
||||||
doTest(testString, 26, q, 23, "\u0938\u0941\u0902");
|
doTest(testString, 26, q, 23, "\u0938\u0941\u0902");
|
||||||
q = charIter1.preceding(16);
|
q = charIter1.preceding(16);
|
||||||
@ -349,7 +349,7 @@ public class RBBIAPITest extends com.ibm.icu.dev.test.TestFmwk {
|
|||||||
if (p != RuleBasedBreakIterator.DONE || q != RuleBasedBreakIterator.DONE)
|
if (p != RuleBasedBreakIterator.DONE || q != RuleBasedBreakIterator.DONE)
|
||||||
errln("ERROR: previous()/preceding() at starting position returned #"
|
errln("ERROR: previous()/preceding() at starting position returned #"
|
||||||
+ p + " and " + q + " instead of 0\n");
|
+ p + " and " + q + " instead of 0\n");
|
||||||
testString = "Hello! how are you? I'am fine. Thankyou. How are you doing? This\n costs $20,00,000.";
|
testString = "Hello! how are you? I'am fine. Thankyou. How are you doing? This costs $20,00,000.";
|
||||||
logln("testing sentence iter - String:- \"" + testString + "\"");
|
logln("testing sentence iter - String:- \"" + testString + "\"");
|
||||||
RuleBasedBreakIterator sentIter1 = (RuleBasedBreakIterator) RuleBasedBreakIterator.getSentenceInstance(Locale.getDefault());
|
RuleBasedBreakIterator sentIter1 = (RuleBasedBreakIterator) RuleBasedBreakIterator.getSentenceInstance(Locale.getDefault());
|
||||||
sentIter1.setText(testString);
|
sentIter1.setText(testString);
|
||||||
@ -357,7 +357,7 @@ public class RBBIAPITest extends com.ibm.icu.dev.test.TestFmwk {
|
|||||||
if (p != testString.length())
|
if (p != testString.length())
|
||||||
errln("ERROR: last() returned" + p + "instead of " + testString.length());
|
errln("ERROR: last() returned" + p + "instead of " + testString.length());
|
||||||
q = sentIter1.previous();
|
q = sentIter1.previous();
|
||||||
doTest(testString, p, q, 60, "This\n costs $20,00,000.");
|
doTest(testString, p, q, 60, "This costs $20,00,000.");
|
||||||
p = q;
|
p = q;
|
||||||
q = sentIter1.previous();
|
q = sentIter1.previous();
|
||||||
doTest(testString, p, q, 41, "How are you doing? ");
|
doTest(testString, p, q, 41, "How are you doing? ");
|
||||||
@ -399,7 +399,7 @@ public class RBBIAPITest extends com.ibm.icu.dev.test.TestFmwk {
|
|||||||
* Tests the method IsBoundary() of RuleBasedBreakIterator
|
* Tests the method IsBoundary() of RuleBasedBreakIterator
|
||||||
**/
|
**/
|
||||||
public void TestIsBoundary() {
|
public void TestIsBoundary() {
|
||||||
String testString1 = "Write here. \u092d\u093e\u0930\u0924 \u0938\u0941\u0902\u0926\u0930 \u0939\u094c\u0964";
|
String testString1 = "Write here. \u092d\u0301\u0930\u0924 \u0938\u0941\u0902\u0926\u0930 a\u0301u";
|
||||||
RuleBasedBreakIterator charIter1 = (RuleBasedBreakIterator) RuleBasedBreakIterator.getCharacterInstance(Locale.getDefault());
|
RuleBasedBreakIterator charIter1 = (RuleBasedBreakIterator) RuleBasedBreakIterator.getCharacterInstance(Locale.getDefault());
|
||||||
charIter1.setText(testString1);
|
charIter1.setText(testString1);
|
||||||
int bounds1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 20, 21, 22, 23, 25, 26};
|
int bounds1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 20, 21, 22, 23, 25, 26};
|
||||||
|
@ -9,6 +9,7 @@ package com.ibm.icu.dev.test.rbbi;
|
|||||||
//Regression testing of RuleBasedBreakIterator
|
//Regression testing of RuleBasedBreakIterator
|
||||||
import com.ibm.icu.dev.test.*;
|
import com.ibm.icu.dev.test.*;
|
||||||
import com.ibm.icu.text.RuleBasedBreakIterator;
|
import com.ibm.icu.text.RuleBasedBreakIterator;
|
||||||
|
import com.ibm.icu.text.RuleBasedBreakIterator_Old;
|
||||||
import java.util.Vector;
|
import java.util.Vector;
|
||||||
|
|
||||||
public class RBBITest extends TestFmwk
|
public class RBBITest extends TestFmwk
|
||||||
@ -43,6 +44,15 @@ public class RBBITest extends TestFmwk
|
|||||||
public void TestDefaultRuleBasedCharacterIteration(){
|
public void TestDefaultRuleBasedCharacterIteration(){
|
||||||
RuleBasedBreakIterator rbbi=(RuleBasedBreakIterator)RuleBasedBreakIterator.getCharacterInstance();
|
RuleBasedBreakIterator rbbi=(RuleBasedBreakIterator)RuleBasedBreakIterator.getCharacterInstance();
|
||||||
logln("Testing the RBBI for character iteration by using default rules");
|
logln("Testing the RBBI for character iteration by using default rules");
|
||||||
|
try {
|
||||||
|
RuleBasedBreakIterator_Old obi = (RuleBasedBreakIterator_Old)rbbi;
|
||||||
|
}
|
||||||
|
catch (ClassCastException e) {
|
||||||
|
// Bail out if using new RBBI implementation
|
||||||
|
logln("Test Skipped.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
//fetch the rules used to create the above RuleBasedBreakIterator
|
//fetch the rules used to create the above RuleBasedBreakIterator
|
||||||
String defaultRules=rbbi.toString();
|
String defaultRules=rbbi.toString();
|
||||||
|
|
||||||
@ -172,6 +182,14 @@ public class RBBITest extends TestFmwk
|
|||||||
public void TestDefaultRuleBasedWordIteration(){
|
public void TestDefaultRuleBasedWordIteration(){
|
||||||
logln("Testing the RBBI for word iteration using default rules");
|
logln("Testing the RBBI for word iteration using default rules");
|
||||||
RuleBasedBreakIterator rbbi=(RuleBasedBreakIterator)RuleBasedBreakIterator.getWordInstance();
|
RuleBasedBreakIterator rbbi=(RuleBasedBreakIterator)RuleBasedBreakIterator.getWordInstance();
|
||||||
|
try {
|
||||||
|
RuleBasedBreakIterator_Old obi = (RuleBasedBreakIterator_Old)rbbi;
|
||||||
|
}
|
||||||
|
catch (ClassCastException e) {
|
||||||
|
// Bail out if using new RBBI implementation
|
||||||
|
logln("Test Skipped.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
//fetch the rules used to create the above RuleBasedBreakIterator
|
//fetch the rules used to create the above RuleBasedBreakIterator
|
||||||
String defaultRules=rbbi.toString();
|
String defaultRules=rbbi.toString();
|
||||||
|
|
||||||
@ -325,6 +343,14 @@ public class RBBITest extends TestFmwk
|
|||||||
logln("Testing the RBBI for sentence iteration using default rules");
|
logln("Testing the RBBI for sentence iteration using default rules");
|
||||||
RuleBasedBreakIterator rbbi=(RuleBasedBreakIterator)RuleBasedBreakIterator.getSentenceInstance();
|
RuleBasedBreakIterator rbbi=(RuleBasedBreakIterator)RuleBasedBreakIterator.getSentenceInstance();
|
||||||
//fetch the rules used to create the above RuleBasedBreakIterator
|
//fetch the rules used to create the above RuleBasedBreakIterator
|
||||||
|
try {
|
||||||
|
RuleBasedBreakIterator_Old obi = (RuleBasedBreakIterator_Old)rbbi;
|
||||||
|
}
|
||||||
|
catch (ClassCastException e) {
|
||||||
|
// Bail out if using new RBBI implementation
|
||||||
|
logln("Test Skipped.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
String defaultRules=rbbi.toString();
|
String defaultRules=rbbi.toString();
|
||||||
RuleBasedBreakIterator sentIterDefault=null;
|
RuleBasedBreakIterator sentIterDefault=null;
|
||||||
try{
|
try{
|
||||||
@ -421,6 +447,14 @@ public class RBBITest extends TestFmwk
|
|||||||
logln("Testing the RBBI for line iteration using default rules");
|
logln("Testing the RBBI for line iteration using default rules");
|
||||||
RuleBasedBreakIterator rbbi=(RuleBasedBreakIterator)RuleBasedBreakIterator.getLineInstance();
|
RuleBasedBreakIterator rbbi=(RuleBasedBreakIterator)RuleBasedBreakIterator.getLineInstance();
|
||||||
//fetch the rules used to create the above RuleBasedBreakIterator
|
//fetch the rules used to create the above RuleBasedBreakIterator
|
||||||
|
try {
|
||||||
|
RuleBasedBreakIterator_Old obi = (RuleBasedBreakIterator_Old)rbbi;
|
||||||
|
}
|
||||||
|
catch (ClassCastException e) {
|
||||||
|
// Bail out if using new RBBI implementation
|
||||||
|
logln("Test Skipped.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
String defaultRules=rbbi.toString();
|
String defaultRules=rbbi.toString();
|
||||||
RuleBasedBreakIterator lineIterDefault=null;
|
RuleBasedBreakIterator lineIterDefault=null;
|
||||||
try{
|
try{
|
||||||
@ -524,6 +558,15 @@ public class RBBITest extends TestFmwk
|
|||||||
// get overridden.
|
// get overridden.
|
||||||
rbbi.toString();
|
rbbi.toString();
|
||||||
RuleBasedBreakIterator lineIter=null;
|
RuleBasedBreakIterator lineIter=null;
|
||||||
|
try {
|
||||||
|
RuleBasedBreakIterator_Old obi = (RuleBasedBreakIterator_Old)rbbi;
|
||||||
|
}
|
||||||
|
catch (ClassCastException e) {
|
||||||
|
// Bail out if using new RBBI implementation
|
||||||
|
logln("Test Skipped.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
try{
|
try{
|
||||||
lineIter = new RuleBasedBreakIterator(rules);
|
lineIter = new RuleBasedBreakIterator(rules);
|
||||||
}catch(IllegalArgumentException iae){
|
}catch(IllegalArgumentException iae){
|
||||||
@ -651,6 +694,14 @@ public class RBBITest extends TestFmwk
|
|||||||
public void TestAbbrRuleBasedWordIteration(){
|
public void TestAbbrRuleBasedWordIteration(){
|
||||||
logln("Testing the RBBI for word iteration by adding rules to support abbreviation");
|
logln("Testing the RBBI for word iteration by adding rules to support abbreviation");
|
||||||
RuleBasedBreakIterator rb =(RuleBasedBreakIterator)RuleBasedBreakIterator.getWordInstance();
|
RuleBasedBreakIterator rb =(RuleBasedBreakIterator)RuleBasedBreakIterator.getWordInstance();
|
||||||
|
try {
|
||||||
|
// This test won't work with the new break iterators. Cast will fail in this case.
|
||||||
|
RuleBasedBreakIterator_Old obi = (RuleBasedBreakIterator_Old) rb;
|
||||||
|
}
|
||||||
|
catch (ClassCastException e) {
|
||||||
|
logln("Test skipped.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
String wrules2="$abbr=((Mr.)|(Mrs.)|(Ms.)|(Dr.)|(U.S.));" + // abbreviations.
|
String wrules2="$abbr=((Mr.)|(Mrs.)|(Ms.)|(Dr.)|(U.S.));" + // abbreviations.
|
||||||
rb.toString() +
|
rb.toString() +
|
||||||
@ -701,6 +752,10 @@ public class RBBITest extends TestFmwk
|
|||||||
buffer.append(text);
|
buffer.append(text);
|
||||||
}
|
}
|
||||||
text = buffer.toString();
|
text = buffer.toString();
|
||||||
|
if (rbbi == null) {
|
||||||
|
errln("null iterator, test skipped.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
rbbi.setText(text);
|
rbbi.setText(text);
|
||||||
|
|
||||||
|
@ -29,11 +29,11 @@ public class BreakIteratorRules extends ListResourceBundle {
|
|||||||
// BreakIteratorClasses lists the class names to instantiate for each
|
// BreakIteratorClasses lists the class names to instantiate for each
|
||||||
// built-in type of BreakIterator
|
// built-in type of BreakIterator
|
||||||
{ "BreakIteratorClasses",
|
{ "BreakIteratorClasses",
|
||||||
new String[] { "RuleBasedBreakIterator", // character-break iterator class
|
new String[] { "RuleBasedBreakIterator_New", // character-break iterator class
|
||||||
"RuleBasedBreakIterator", // word-break iterator class
|
"RuleBasedBreakIterator_New", // word-break iterator class
|
||||||
"RuleBasedBreakIterator", // line-break iterator class
|
"RuleBasedBreakIterator_New", // line-break iterator class
|
||||||
"RuleBasedBreakIterator", // sentence-break iterator class
|
"RuleBasedBreakIterator_New", // sentence-break iterator class
|
||||||
"RuleBasedBreakIterator"} // Title-Case break iterator class
|
"RuleBasedBreakIterator_New"} // Title-Case break iterator class
|
||||||
},
|
},
|
||||||
|
|
||||||
// rules describing how to break between logical characters
|
// rules describing how to break between logical characters
|
||||||
|
@ -27,10 +27,10 @@ public class BreakIteratorRules_th extends ListResourceBundle {
|
|||||||
// iterator. Notice we're now using DictionaryBasedBreakIterator
|
// iterator. Notice we're now using DictionaryBasedBreakIterator
|
||||||
// for word and line breaking.
|
// for word and line breaking.
|
||||||
{ "BreakIteratorClasses",
|
{ "BreakIteratorClasses",
|
||||||
new String[] { "RuleBasedBreakIterator", // character-break iterator class
|
new String[] { "RuleBasedBreakIterator_New", // character-break iterator class
|
||||||
"DictionaryBasedBreakIterator", // word-break iterator class
|
"DictionaryBasedBreakIterator", // word-break iterator class
|
||||||
"DictionaryBasedBreakIterator", // line-break iterator class
|
"DictionaryBasedBreakIterator", // line-break iterator class
|
||||||
"RuleBasedBreakIterator" } // sentence-break iterator class
|
"RuleBasedBreakIterator_New" } // sentence-break iterator class
|
||||||
},
|
},
|
||||||
|
|
||||||
{ "WordBreakRules",
|
{ "WordBreakRules",
|
||||||
|
@ -18,6 +18,7 @@ import com.ibm.icu.impl.ICULocaleService;
|
|||||||
import com.ibm.icu.impl.ICUService;
|
import com.ibm.icu.impl.ICUService;
|
||||||
import com.ibm.icu.impl.ICUService.Factory;
|
import com.ibm.icu.impl.ICUService.Factory;
|
||||||
import com.ibm.icu.util.ULocale;
|
import com.ibm.icu.util.ULocale;
|
||||||
|
import com.ibm.icu.util.VersionInfo;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author Ram
|
* @author Ram
|
||||||
@ -76,10 +77,26 @@ final class BreakIteratorFactory extends BreakIterator.BreakIteratorServiceShim
|
|||||||
}
|
}
|
||||||
static final ICULocaleService service = new BFService();
|
static final ICULocaleService service = new BFService();
|
||||||
|
|
||||||
|
// KIND_NAMES are used in synthesizing the resource name that holds the source
|
||||||
|
// break rules. For old-style (ICU 2.8 and previous) break iterators.
|
||||||
|
// The resources are com.ibm.icu.impl.data.BreakIteratorRules, and have
|
||||||
|
// names like "CharacterBreakRules", where the "Character" part of the
|
||||||
|
// name comes from here (this array).
|
||||||
private static final String[] KIND_NAMES = {
|
private static final String[] KIND_NAMES = {
|
||||||
"Character", "Word", "Line", "Sentence", "Title"
|
"Character", "Word", "Line", "Sentence", "Title"
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/** KIND_NAMES_2 are used in synthesizing the names for
|
||||||
|
* the precompiled break rules used with the new (ICU 3.0) RBBI.
|
||||||
|
* The fully assembled names look like icudt30b_char.brk, which is the
|
||||||
|
* file name of the brk file as produced by the ICU4C build.
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
private static final String[] KIND_NAMES_2 = {
|
||||||
|
"char", "word", "line", "sent", "title"
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
private static BreakIterator createBreakInstance(Locale locale, int kind) {
|
private static BreakIterator createBreakInstance(Locale locale, int kind) {
|
||||||
String prefix = KIND_NAMES[kind];
|
String prefix = KIND_NAMES[kind];
|
||||||
return createBreakInstance(locale, kind,
|
return createBreakInstance(locale, kind,
|
||||||
@ -97,8 +114,25 @@ final class BreakIteratorFactory extends BreakIterator.BreakIteratorServiceShim
|
|||||||
String[] classNames = bundle.getStringArray("BreakIteratorClasses");
|
String[] classNames = bundle.getStringArray("BreakIteratorClasses");
|
||||||
String rules = bundle.getString(rulesName);
|
String rules = bundle.getString(rulesName);
|
||||||
if (classNames[kind].equals("RuleBasedBreakIterator")) {
|
if (classNames[kind].equals("RuleBasedBreakIterator")) {
|
||||||
|
// Old style (2.8 and previous) Break Iterator.
|
||||||
|
// Not used by default, but if someone wants to specify the old class
|
||||||
|
// in some locale's resources, it should still work.
|
||||||
iter = new RuleBasedBreakIterator_Old(rules);
|
iter = new RuleBasedBreakIterator_Old(rules);
|
||||||
}
|
}
|
||||||
|
else if (classNames[kind].equals("RuleBasedBreakIterator_New")) {
|
||||||
|
try {
|
||||||
|
// Class for new RBBI engine.
|
||||||
|
// Set up path to precompiled rule data.
|
||||||
|
String rulesFileName =
|
||||||
|
"data/icudt" + VersionInfo.ICU_VERSION.getMajor() +
|
||||||
|
VersionInfo.ICU_VERSION.getMinor() + "b_" + KIND_NAMES_2[kind] + ".brk";
|
||||||
|
InputStream is = ICUData.getRequiredStream(rulesFileName);
|
||||||
|
iter = RuleBasedBreakIterator_New.getInstanceFromCompiledRules(is);
|
||||||
|
}
|
||||||
|
catch (IOException e) {
|
||||||
|
throw new IllegalArgumentException(e.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
else if (classNames[kind].equals("DictionaryBasedBreakIterator")) {
|
else if (classNames[kind].equals("DictionaryBasedBreakIterator")) {
|
||||||
try {
|
try {
|
||||||
InputStream dictionary = ICUData.getStream(bundle.getString(dictionaryName));
|
InputStream dictionary = ICUData.getStream(bundle.getString(dictionaryName));
|
||||||
|
@ -8,16 +8,10 @@
|
|||||||
package com.ibm.icu.text;
|
package com.ibm.icu.text;
|
||||||
|
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.BufferedInputStream;
|
|
||||||
import java.io.DataInputStream;
|
import java.io.DataInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Locale;
|
|
||||||
|
|
||||||
import com.ibm.icu.util.RangeValueIterator;
|
|
||||||
import com.ibm.icu.util.VersionInfo;
|
import com.ibm.icu.util.VersionInfo;
|
||||||
import com.ibm.icu.lang.UCharacter;
|
|
||||||
import com.ibm.icu.lang.UCharacterCategory;
|
|
||||||
import com.ibm.icu.lang.UProperty;
|
|
||||||
import com.ibm.icu.impl.ICUData;
|
import com.ibm.icu.impl.ICUData;
|
||||||
import com.ibm.icu.impl.Trie;
|
import com.ibm.icu.impl.Trie;
|
||||||
import com.ibm.icu.impl.CharTrie;
|
import com.ibm.icu.impl.CharTrie;
|
||||||
@ -69,10 +63,14 @@ public class RBBIDataWrapper {
|
|||||||
|
|
||||||
// Getters for fields from the state table header
|
// Getters for fields from the state table header
|
||||||
//
|
//
|
||||||
final static int getNumStates(int table[]) {
|
final static int getNumStates(short table[]) {
|
||||||
return table[NUMSTATES]<<16 + (table[NUMSTATES+1]&0xffff);
|
int hi = table[NUMSTATES];
|
||||||
|
int lo = table[NUMSTATES+1];
|
||||||
|
int val = (hi<<16) + (lo&0x0000ffff);
|
||||||
|
return val;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Data Header. A struct-like class with the fields from the RBBI data file header.
|
* Data Header. A struct-like class with the fields from the RBBI data file header.
|
||||||
*/
|
*/
|
||||||
@ -119,14 +117,14 @@ public class RBBIDataWrapper {
|
|||||||
|
|
||||||
static class TrieFoldingFunc implements Trie.DataManipulate {
|
static class TrieFoldingFunc implements Trie.DataManipulate {
|
||||||
public int getFoldingOffset(int data) {
|
public int getFoldingOffset(int data) {
|
||||||
if ((data & 0x8000) == 0) {
|
if ((data & 0x8000) != 0) {
|
||||||
return data & 0x7fff;
|
return data & 0x7fff;
|
||||||
} else {
|
} else {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
static TrieFoldingFunc fTrieFoldingFunc;
|
static TrieFoldingFunc fTrieFoldingFunc = new TrieFoldingFunc();
|
||||||
|
|
||||||
|
|
||||||
RBBIDataWrapper() {
|
RBBIDataWrapper() {
|
||||||
@ -299,19 +297,148 @@ public class RBBIDataWrapper {
|
|||||||
/** Debug function to display the break iterator data. */
|
/** Debug function to display the break iterator data. */
|
||||||
void dump() {
|
void dump() {
|
||||||
System.out.println("RBBI Data Wrapper dump ...");
|
System.out.println("RBBI Data Wrapper dump ...");
|
||||||
|
System.out.println();
|
||||||
|
System.out.println("Forward State Table");
|
||||||
|
dumpTable(fFTable);
|
||||||
|
System.out.println("Reverse State Table");
|
||||||
|
dumpTable(fRTable);
|
||||||
|
System.out.println("Forward Safe Points Table");
|
||||||
|
dumpTable(fSFTable);
|
||||||
|
System.out.println("Reverse Safe Points Table");
|
||||||
|
dumpTable(fSRTable);
|
||||||
|
|
||||||
|
dumpCharCategories();
|
||||||
System.out.println("Source Rules: " + fRuleSource);
|
System.out.println("Source Rules: " + fRuleSource);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Fixed width int-to-string conversion.
|
||||||
|
* TODO: there must be easy built-in way to do this */
|
||||||
|
private static String intToString(int n, int width) {
|
||||||
|
StringBuffer dest = new StringBuffer(width);
|
||||||
|
dest.append(n);
|
||||||
|
while (dest.length() < width) {
|
||||||
|
dest.insert(0, ' ');
|
||||||
|
}
|
||||||
|
return dest.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Dump a state table. (A full set of RBBI rules has 4 state tables.) */
|
||||||
|
private void dumpTable(short table[]) {
|
||||||
|
int n;
|
||||||
|
int state;
|
||||||
|
String header = " Row Acc Look Tag";
|
||||||
|
for (n=0; n<fHeader.fCatCount; n++) {
|
||||||
|
header += intToString(n, 5);
|
||||||
|
}
|
||||||
|
System.out.println(header);
|
||||||
|
for (n=0; n<header.length(); n++) {
|
||||||
|
System.out.print("-");
|
||||||
|
}
|
||||||
|
System.out.println();
|
||||||
|
for (state=0; state< getNumStates(table); state++) {
|
||||||
|
dumpRow(table, state);
|
||||||
|
}
|
||||||
|
System.out.println();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Dump (for debug) a single row of an RBBI state table
|
||||||
|
* @param table
|
||||||
|
* @param state
|
||||||
|
* @internal
|
||||||
|
*/
|
||||||
|
private void dumpRow(short table[], int state) {
|
||||||
|
StringBuffer dest = new StringBuffer(fHeader.fCatCount*5 + 20);
|
||||||
|
dest.append(intToString(state, 4));
|
||||||
|
int row = getRowIndex(state);
|
||||||
|
if (table[row+ACCEPTING] != 0) {
|
||||||
|
dest.append(intToString(table[row+ACCEPTING], 5));
|
||||||
|
}else {
|
||||||
|
dest.append(" ");
|
||||||
|
}
|
||||||
|
if (table[row+LOOKAHEAD] != 0) {
|
||||||
|
System.out.println(dest);
|
||||||
|
dest.append(intToString(table[row+LOOKAHEAD], 5));
|
||||||
|
}else {
|
||||||
|
dest.append(" ");
|
||||||
|
}
|
||||||
|
dest.append(intToString(table[row+TAGIDX], 5));
|
||||||
|
|
||||||
|
for (int col=0; col<fHeader.fCatCount; col++) {
|
||||||
|
dest.append(intToString(table[row+NEXTSTATES+col], 5));
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.println(dest);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void dumpCharCategories() {
|
||||||
|
int n = fHeader.fCatCount;
|
||||||
|
String catStrings[] = new String[n+1];
|
||||||
|
int rangeStart = 0;
|
||||||
|
int rangeEnd = 0;
|
||||||
|
int lastCat = -1;
|
||||||
|
int char32;
|
||||||
|
int category;
|
||||||
|
int lastNewline[] = new int[n+1];
|
||||||
|
|
||||||
|
for (category = 0; category <= fHeader.fCatCount; category ++) {
|
||||||
|
catStrings[category] = "";
|
||||||
|
}
|
||||||
|
System.out.println("\nCharacter Categories");
|
||||||
|
System.out.println("--------------------");
|
||||||
|
for (char32 = 0; char32<=0x10ffff; char32++) {
|
||||||
|
category = fTrie.getCodePointValue(char32);
|
||||||
|
category &= ~0x4000; // Mask off dictionary bit.
|
||||||
|
if (category < 0 || category > fHeader.fCatCount) {
|
||||||
|
System.out.println("Error, bad category " + Integer.toHexString(category) +
|
||||||
|
" for char " + Integer.toHexString(char32));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (category == lastCat ) {
|
||||||
|
rangeEnd = char32;
|
||||||
|
} else {
|
||||||
|
if (lastCat >= 0) {
|
||||||
|
if (catStrings[lastCat].length() > lastNewline[lastCat] + 70) {
|
||||||
|
lastNewline[lastCat] = catStrings[lastCat].length() + 10;
|
||||||
|
catStrings[lastCat] += "\n ";
|
||||||
|
}
|
||||||
|
|
||||||
|
catStrings[lastCat] += " " + Integer.toHexString(rangeStart);
|
||||||
|
if (rangeEnd != rangeStart) {
|
||||||
|
catStrings[lastCat] += "-" + Integer.toHexString(rangeEnd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
lastCat = category;
|
||||||
|
rangeStart = rangeEnd = char32;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catStrings[lastCat] += " " + Integer.toHexString(rangeStart);
|
||||||
|
if (rangeEnd != rangeStart) {
|
||||||
|
catStrings[lastCat] += "-" + Integer.toHexString(rangeEnd);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (category = 0; category <= fHeader.fCatCount; category ++) {
|
||||||
|
System.out.println (intToString(category, 5) + " " + catStrings[category]);
|
||||||
|
}
|
||||||
|
System.out.println();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
String s;
|
String s;
|
||||||
if (args.length == 0) {
|
if (args.length == 0) {
|
||||||
s = "icudt28b_char.brk";
|
s = "char";
|
||||||
} else {
|
} else {
|
||||||
s = args[0];
|
s = args[0];
|
||||||
}
|
}
|
||||||
System.out.println("RBBIDataWrapper.main(" + s + ") ");
|
System.out.println("RBBIDataWrapper.main(" + s + ") ");
|
||||||
|
|
||||||
|
String versionedName =
|
||||||
|
"icudt" + VersionInfo.ICU_VERSION.getMajor() +
|
||||||
|
VersionInfo.ICU_VERSION.getMinor() + "b_" + s + ".brk";
|
||||||
|
|
||||||
try {
|
try {
|
||||||
RBBIDataWrapper This = RBBIDataWrapper.get(s);
|
RBBIDataWrapper This = RBBIDataWrapper.get(versionedName);
|
||||||
This.dump();
|
This.dump();
|
||||||
}
|
}
|
||||||
catch (Exception e) {
|
catch (Exception e) {
|
||||||
|
@ -7,7 +7,9 @@
|
|||||||
package com.ibm.icu.text;
|
package com.ibm.icu.text;
|
||||||
|
|
||||||
import java.text.CharacterIterator;
|
import java.text.CharacterIterator;
|
||||||
import java.text.StringCharacterIterator;
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Rule Based Break Iterator implementation.
|
* Rule Based Break Iterator implementation.
|
||||||
@ -27,7 +29,7 @@ public class RuleBasedBreakIterator_New extends RuleBasedBreakIterator {
|
|||||||
* The rule data for this BreakIterator instance
|
* The rule data for this BreakIterator instance
|
||||||
* @internal
|
* @internal
|
||||||
*/
|
*/
|
||||||
private RBBIDataWrapper fData;
|
private RBBIDataWrapper fRData;
|
||||||
|
|
||||||
/** Index of the Rule {tag} values for the most recent match.
|
/** Index of the Rule {tag} values for the most recent match.
|
||||||
* @internal
|
* @internal
|
||||||
@ -61,7 +63,9 @@ public class RuleBasedBreakIterator_New extends RuleBasedBreakIterator {
|
|||||||
public Object clone()
|
public Object clone()
|
||||||
{
|
{
|
||||||
RuleBasedBreakIterator_New result = (RuleBasedBreakIterator_New) super.clone();
|
RuleBasedBreakIterator_New result = (RuleBasedBreakIterator_New) super.clone();
|
||||||
// TODO: real clone code
|
if (fText != null) {
|
||||||
|
fText = (CharacterIterator)fText.clone();
|
||||||
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -71,7 +75,26 @@ public class RuleBasedBreakIterator_New extends RuleBasedBreakIterator {
|
|||||||
* @stable ICU 2.0
|
* @stable ICU 2.0
|
||||||
*/
|
*/
|
||||||
public boolean equals(Object that) {
|
public boolean equals(Object that) {
|
||||||
return false; // TODO:
|
try {
|
||||||
|
RuleBasedBreakIterator_New other = (RuleBasedBreakIterator_New) that;
|
||||||
|
if (fRData != other.fRData && (fRData == null || other.fRData == null)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (fRData != null && other.fRData != null &&
|
||||||
|
(!fRData.fRuleSource.equals(other.fRData.fRuleSource))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (fText == null && other.fText == null) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (fText == null || other.fText == null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return fText.equals(other.fText);
|
||||||
|
}
|
||||||
|
catch(ClassCastException e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -81,8 +104,8 @@ public class RuleBasedBreakIterator_New extends RuleBasedBreakIterator {
|
|||||||
*/
|
*/
|
||||||
public String toString() {
|
public String toString() {
|
||||||
String retStr = null;
|
String retStr = null;
|
||||||
if (fData != null) {
|
if (fRData != null) {
|
||||||
retStr = fData.fRuleSource;
|
retStr = fRData.fRuleSource;
|
||||||
}
|
}
|
||||||
return retStr;
|
return retStr;
|
||||||
}
|
}
|
||||||
@ -94,9 +117,23 @@ public class RuleBasedBreakIterator_New extends RuleBasedBreakIterator {
|
|||||||
*/
|
*/
|
||||||
public int hashCode()
|
public int hashCode()
|
||||||
{
|
{
|
||||||
return 0; // TODO
|
return fRData.fRuleSource.hashCode();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//=======================================================================
|
||||||
|
// Constructors & Factories
|
||||||
|
//=======================================================================
|
||||||
|
public static RuleBasedBreakIterator getInstanceFromCompiledRules(InputStream is) throws IOException {
|
||||||
|
RuleBasedBreakIterator_New This = new RuleBasedBreakIterator_New();
|
||||||
|
This.fRData = RBBIDataWrapper.get(is);
|
||||||
|
This.fText = new java.text.StringCharacterIterator(""); // Note: some old tests fail if fText is null
|
||||||
|
// on a newly created instance.
|
||||||
|
return This;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//=======================================================================
|
//=======================================================================
|
||||||
// BreakIterator overrides
|
// BreakIterator overrides
|
||||||
//=======================================================================
|
//=======================================================================
|
||||||
@ -192,8 +229,8 @@ public class RuleBasedBreakIterator_New extends RuleBasedBreakIterator {
|
|||||||
return BreakIterator.DONE;
|
return BreakIterator.DONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fData.fSRTable != null || fData.fSFTable != null) {
|
if (fRData.fSRTable != null || fRData.fSFTable != null) {
|
||||||
return handlePrevious(fData.fRTable);
|
return handlePrevious(fRData.fRTable);
|
||||||
}
|
}
|
||||||
|
|
||||||
// old rule syntax
|
// old rule syntax
|
||||||
@ -266,7 +303,7 @@ public class RuleBasedBreakIterator_New extends RuleBasedBreakIterator {
|
|||||||
|
|
||||||
int result = 0;
|
int result = 0;
|
||||||
|
|
||||||
if (fData.fSRTable != null) {
|
if (fRData.fSRTable != null) {
|
||||||
// Safe Point Reverse rules exist.
|
// Safe Point Reverse rules exist.
|
||||||
// This allows us to use the optimum algorithm.
|
// This allows us to use the optimum algorithm.
|
||||||
fText.setIndex(offset);
|
fText.setIndex(offset);
|
||||||
@ -275,20 +312,20 @@ public class RuleBasedBreakIterator_New extends RuleBasedBreakIterator {
|
|||||||
// this handles offset being between a supplementary character
|
// this handles offset being between a supplementary character
|
||||||
CINext32(fText);
|
CINext32(fText);
|
||||||
// handlePrevious will move most of the time to < 1 boundary away
|
// handlePrevious will move most of the time to < 1 boundary away
|
||||||
handlePrevious(fData.fSRTable);
|
handlePrevious(fRData.fSRTable);
|
||||||
result = next();
|
result = next();
|
||||||
while (result <= offset) {
|
while (result <= offset) {
|
||||||
result = next();
|
result = next();
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
if (fData.fSFTable != null) {
|
if (fRData.fSFTable != null) {
|
||||||
// No Safe point reverse table, but there is a safe pt forward table.
|
// No Safe point reverse table, but there is a safe pt forward table.
|
||||||
//
|
//
|
||||||
fText.setIndex(offset);
|
fText.setIndex(offset);
|
||||||
CIPrevious32(fText);
|
CIPrevious32(fText);
|
||||||
// handle next will give result >= offset
|
// handle next will give result >= offset
|
||||||
handleNext(fData.fSFTable);
|
handleNext(fRData.fSFTable);
|
||||||
// previous will give result 0 or 1 boundary away from offset,
|
// previous will give result 0 or 1 boundary away from offset,
|
||||||
// most of the time
|
// most of the time
|
||||||
// we have to
|
// we have to
|
||||||
@ -352,7 +389,7 @@ public class RuleBasedBreakIterator_New extends RuleBasedBreakIterator {
|
|||||||
// to carry out this operation
|
// to carry out this operation
|
||||||
|
|
||||||
int result;
|
int result;
|
||||||
if (fData.fSFTable != null) {
|
if (fRData.fSFTable != null) {
|
||||||
/// todo synwee
|
/// todo synwee
|
||||||
// new rule syntax
|
// new rule syntax
|
||||||
fText.setIndex(offset);
|
fText.setIndex(offset);
|
||||||
@ -360,19 +397,19 @@ public class RuleBasedBreakIterator_New extends RuleBasedBreakIterator {
|
|||||||
// safe point.
|
// safe point.
|
||||||
// this handles offset being between a supplementary character
|
// this handles offset being between a supplementary character
|
||||||
CIPrevious32(fText);
|
CIPrevious32(fText);
|
||||||
handleNext(fData.fSFTable);
|
handleNext(fRData.fSFTable);
|
||||||
result = previous();
|
result = previous();
|
||||||
while (result >= offset) {
|
while (result >= offset) {
|
||||||
result = previous();
|
result = previous();
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
if (fData.fSRTable != null) {
|
if (fRData.fSRTable != null) {
|
||||||
// backup plan if forward safe table is not available
|
// backup plan if forward safe table is not available
|
||||||
fText.setIndex(offset);
|
fText.setIndex(offset);
|
||||||
CINext32(fText);
|
CINext32(fText);
|
||||||
// handle previous will give result <= offset
|
// handle previous will give result <= offset
|
||||||
handlePrevious(fData.fSRTable);
|
handlePrevious(fRData.fSRTable);
|
||||||
|
|
||||||
// next will give result 0 or 1 boundary away from offset,
|
// next will give result 0 or 1 boundary away from offset,
|
||||||
// most of the time
|
// most of the time
|
||||||
@ -397,6 +434,19 @@ public class RuleBasedBreakIterator_New extends RuleBasedBreakIterator {
|
|||||||
return previous();
|
return previous();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Throw IllegalArgumentException unless begin <= offset < end.
|
||||||
|
* TODO: subclassing interface from old RBBI is not really usable.
|
||||||
|
* What to do with old protected functions tagged as stable?
|
||||||
|
* @stable ICU 2.0
|
||||||
|
*/
|
||||||
|
protected static final void checkOffset(int offset, CharacterIterator text) {
|
||||||
|
if (offset < text.getBeginIndex() || offset > text.getEndIndex()) {
|
||||||
|
throw new IllegalArgumentException("offset out of bounds");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true if the specfied position is a boundary position. As a side
|
* Returns true if the specfied position is a boundary position. As a side
|
||||||
* effect, leaves the iterator pointing to the first boundary position at
|
* effect, leaves the iterator pointing to the first boundary position at
|
||||||
@ -406,8 +456,10 @@ public class RuleBasedBreakIterator_New extends RuleBasedBreakIterator {
|
|||||||
* @stable ICU 2.0
|
* @stable ICU 2.0
|
||||||
*/
|
*/
|
||||||
public boolean isBoundary(int offset) {
|
public boolean isBoundary(int offset) {
|
||||||
|
checkOffset(offset, fText);
|
||||||
|
|
||||||
// the beginning index of the iterator is always a boundary position by definition
|
// the beginning index of the iterator is always a boundary position by definition
|
||||||
if (fText == null || offset == fText.getBeginIndex()) {
|
if (offset == fText.getBeginIndex()) {
|
||||||
first(); // For side effects on current position, tag values.
|
first(); // For side effects on current position, tag values.
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -502,8 +554,8 @@ public int getRuleStatus() {
|
|||||||
// Status val N-1 <-- the value we need to return
|
// Status val N-1 <-- the value we need to return
|
||||||
// The status values are sorted in ascending order.
|
// The status values are sorted in ascending order.
|
||||||
// This function returns the last (largest) of the array of status values.
|
// This function returns the last (largest) of the array of status values.
|
||||||
int idx = fLastRuleStatusIndex + fData.fStatusTable[fLastRuleStatusIndex];
|
int idx = fLastRuleStatusIndex + fRData.fStatusTable[fLastRuleStatusIndex];
|
||||||
int tagVal = fData.fStatusTable[idx];
|
int tagVal = fRData.fStatusTable[idx];
|
||||||
|
|
||||||
return tagVal;
|
return tagVal;
|
||||||
}
|
}
|
||||||
@ -532,11 +584,11 @@ public int getRuleStatus() {
|
|||||||
*/
|
*/
|
||||||
public int getRuleStatusVec(int[] fillInArray) {
|
public int getRuleStatusVec(int[] fillInArray) {
|
||||||
makeRuleStatusValid();
|
makeRuleStatusValid();
|
||||||
int numStatusVals = fData.fStatusTable[fLastRuleStatusIndex];
|
int numStatusVals = fRData.fStatusTable[fLastRuleStatusIndex];
|
||||||
if (fillInArray != null) {
|
if (fillInArray != null) {
|
||||||
int numToCopy = Math.min(numStatusVals, fillInArray.length);
|
int numToCopy = Math.min(numStatusVals, fillInArray.length);
|
||||||
for (int i=0; i<numToCopy; i++) {
|
for (int i=0; i<numToCopy; i++) {
|
||||||
fillInArray[i] = fData.fStatusTable[fLastRuleStatusIndex + i + 1];
|
fillInArray[i] = fRData.fStatusTable[fLastRuleStatusIndex + i + 1];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return numStatusVals;
|
return numStatusVals;
|
||||||
@ -618,8 +670,7 @@ public int getRuleStatusVec(int[] fillInArray) {
|
|||||||
if (ci == null) {
|
if (ci == null) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
int end = ci.getEndIndex();
|
if (ci.getIndex() >= ci.getEndIndex()) {
|
||||||
if (end == 0 || ci.getIndex() < end) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
@ -637,7 +688,7 @@ public int getRuleStatusVec(int[] fillInArray) {
|
|||||||
* @internal
|
* @internal
|
||||||
*/
|
*/
|
||||||
private int handleNext() {
|
private int handleNext() {
|
||||||
return handleNext(fData.fFTable);
|
return handleNext(fRData.fFTable);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -663,7 +714,7 @@ public int getRuleStatusVec(int[] fillInArray) {
|
|||||||
int state = START_STATE;
|
int state = START_STATE;
|
||||||
short category;
|
short category;
|
||||||
int c = CICurrent32(fText);
|
int c = CICurrent32(fText);
|
||||||
int row = fData.getRowIndex(state);
|
int row = fRData.getRowIndex(state);
|
||||||
int lookaheadStatus = 0;
|
int lookaheadStatus = 0;
|
||||||
int lookaheadTagIdx = 0;
|
int lookaheadTagIdx = 0;
|
||||||
|
|
||||||
@ -671,7 +722,7 @@ public int getRuleStatusVec(int[] fillInArray) {
|
|||||||
|
|
||||||
// Character Category fetch for starting character.
|
// Character Category fetch for starting character.
|
||||||
// See comments on character category code within loop, below.
|
// See comments on character category code within loop, below.
|
||||||
category = (short)fData.fTrie.getCodePointValue(c);
|
category = (short)fRData.fTrie.getCodePointValue(c);
|
||||||
if ((category & 0x4000) != 0) {
|
if ((category & 0x4000) != 0) {
|
||||||
// fDictionaryCharCount++;
|
// fDictionaryCharCount++;
|
||||||
category &= ~0x4000;
|
category &= ~0x4000;
|
||||||
@ -704,7 +755,7 @@ public int getRuleStatusVec(int[] fillInArray) {
|
|||||||
// look up the current character's character category, which tells us
|
// look up the current character's character category, which tells us
|
||||||
// which column in the state table to look at.
|
// which column in the state table to look at.
|
||||||
//
|
//
|
||||||
category = (short)fData.fTrie.getCodePointValue(c);
|
category = (short)fRData.fTrie.getCodePointValue(c);
|
||||||
|
|
||||||
// Clear the dictionary flag bit in the character's category.
|
// Clear the dictionary flag bit in the character's category.
|
||||||
// Note: not using the old style dictionary stuff in this Java engine.
|
// Note: not using the old style dictionary stuff in this Java engine.
|
||||||
@ -725,7 +776,7 @@ public int getRuleStatusVec(int[] fillInArray) {
|
|||||||
// look up a state transition in the state table
|
// look up a state transition in the state table
|
||||||
// state = row->fNextState[category];
|
// state = row->fNextState[category];
|
||||||
state = stateTable[row + RBBIDataWrapper.NEXTSTATES + category];
|
state = stateTable[row + RBBIDataWrapper.NEXTSTATES + category];
|
||||||
row = fData.getRowIndex(state);
|
row = fRData.getRowIndex(state);
|
||||||
|
|
||||||
// Get the next character. Doing it here positions the iterator
|
// Get the next character. Doing it here positions the iterator
|
||||||
// to the correct position for recording matches in the code that
|
// to the correct position for recording matches in the code that
|
||||||
@ -793,15 +844,15 @@ public int getRuleStatusVec(int[] fillInArray) {
|
|||||||
* handlePrevious
|
* handlePrevious
|
||||||
*/
|
*/
|
||||||
private int handlePrevious() {
|
private int handlePrevious() {
|
||||||
if (fText == null || fData == null) {
|
if (fText == null || fRData == null) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
if (fData.fRTable == null) {
|
if (fRData.fRTable == null) {
|
||||||
fText.first();
|
fText.first();
|
||||||
return fText.getIndex();
|
return fText.getIndex();
|
||||||
}
|
}
|
||||||
|
|
||||||
short stateTable[] = fData.fRTable;
|
short stateTable[] = fRData.fRTable;
|
||||||
int state = START_STATE;
|
int state = START_STATE;
|
||||||
int category;
|
int category;
|
||||||
int lastCategory = 0;
|
int lastCategory = 0;
|
||||||
@ -812,8 +863,8 @@ public int getRuleStatusVec(int[] fillInArray) {
|
|||||||
int c = CICurrent32(fText);
|
int c = CICurrent32(fText);
|
||||||
int row;
|
int row;
|
||||||
|
|
||||||
row = fData.getRowIndex(state);
|
row = fRData.getRowIndex(state);
|
||||||
category = (short)fData.fTrie.getCodePointValue(c);
|
category = (short)fRData.fTrie.getCodePointValue(c);
|
||||||
category &= ~0x4000; // Clear the dictionary bit, just in case.
|
category &= ~0x4000; // Clear the dictionary bit, just in case.
|
||||||
|
|
||||||
if (fTrace) {
|
if (fTrace) {
|
||||||
@ -829,7 +880,7 @@ public int getRuleStatusVec(int[] fillInArray) {
|
|||||||
// save the last character's category and look up the current
|
// save the last character's category and look up the current
|
||||||
// character's category
|
// character's category
|
||||||
lastCategory = category;
|
lastCategory = category;
|
||||||
category = (short)fData.fTrie.getCodePointValue(c);
|
category = (short)fRData.fTrie.getCodePointValue(c);
|
||||||
|
|
||||||
// Check the dictionary bit in the character's category.
|
// Check the dictionary bit in the character's category.
|
||||||
// Don't exist in this Java engine implementation. Clear the bit.
|
// Don't exist in this Java engine implementation. Clear the bit.
|
||||||
@ -848,7 +899,7 @@ public int getRuleStatusVec(int[] fillInArray) {
|
|||||||
|
|
||||||
// look up a state transition in the backwards state table
|
// look up a state transition in the backwards state table
|
||||||
state = stateTable[row + RBBIDataWrapper.NEXTSTATES + category];
|
state = stateTable[row + RBBIDataWrapper.NEXTSTATES + category];
|
||||||
row = fData.getRowIndex(state);
|
row = fRData.getRowIndex(state);
|
||||||
|
|
||||||
continueOn: {
|
continueOn: {
|
||||||
if (stateTable[row + RBBIDataWrapper.ACCEPTING] == 0 &&
|
if (stateTable[row + RBBIDataWrapper.ACCEPTING] == 0 &&
|
||||||
@ -942,9 +993,9 @@ public int getRuleStatusVec(int[] fillInArray) {
|
|||||||
boolean lookAheadHardBreak =
|
boolean lookAheadHardBreak =
|
||||||
(stateTable[RBBIDataWrapper.FLAGS+1] & RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK) != 0;
|
(stateTable[RBBIDataWrapper.FLAGS+1] & RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK) != 0;
|
||||||
|
|
||||||
int row = fData.getRowIndex(state);
|
int row = fRData.getRowIndex(state);
|
||||||
|
|
||||||
category = (short)fData.fTrie.getCodePointValue(c);
|
category = (short)fRData.fTrie.getCodePointValue(c);
|
||||||
category &= ~0x4000; // Mask off dictionary bit.
|
category &= ~0x4000; // Mask off dictionary bit.
|
||||||
|
|
||||||
if (fTrace) {
|
if (fTrace) {
|
||||||
@ -965,7 +1016,7 @@ public int getRuleStatusVec(int[] fillInArray) {
|
|||||||
// save the last character's category and look up the current
|
// save the last character's category and look up the current
|
||||||
// character's category
|
// character's category
|
||||||
lastCategory = category;
|
lastCategory = category;
|
||||||
category = (short)fData.fTrie.getCodePointValue(c);
|
category = (short)fRData.fTrie.getCodePointValue(c);
|
||||||
|
|
||||||
category &= ~0x4000; // Clear the dictionary bit flag
|
category &= ~0x4000; // Clear the dictionary bit flag
|
||||||
// (Should be unused; holdover from old RBBI)
|
// (Should be unused; holdover from old RBBI)
|
||||||
@ -982,7 +1033,7 @@ public int getRuleStatusVec(int[] fillInArray) {
|
|||||||
|
|
||||||
// look up a state transition in the backwards state table
|
// look up a state transition in the backwards state table
|
||||||
state = stateTable[row + RBBIDataWrapper.NEXTSTATES + category];
|
state = stateTable[row + RBBIDataWrapper.NEXTSTATES + category];
|
||||||
row = fData.getRowIndex(state);
|
row = fRData.getRowIndex(state);
|
||||||
|
|
||||||
if (stateTable[row + RBBIDataWrapper.ACCEPTING] == -1) {
|
if (stateTable[row + RBBIDataWrapper.ACCEPTING] == -1) {
|
||||||
// Match found, common case, could have lookahead so we move on to check it
|
// Match found, common case, could have lookahead so we move on to check it
|
||||||
|
Loading…
Reference in New Issue
Block a user