ICU-13569 RBBI State table size reduction, move reduced tables to ICU4J; better testing and small bug fix in ICU4J builder.

X-SVN-Rev: 40926
This commit is contained in:
Andy Heninger 2018-02-15 23:21:54 +00:00
parent 4298f41e35
commit 25950362de
4 changed files with 34 additions and 11 deletions

View File

@ -30,10 +30,10 @@ public final class RBBIDataWrapper {
// read from the file. // read from the file.
// //
public RBBIDataHeader fHeader; public RBBIDataHeader fHeader;
public short fFTable[]; public short fFTable[];
short fRTable[]; public short fRTable[];
short fSFTable[]; public short fSFTable[];
short fSRTable[]; public short fSRTable[];
Trie2 fTrie; Trie2 fTrie;
String fRuleSource; String fRuleSource;
int fStatusTable[]; int fStatusTable[];
@ -339,7 +339,7 @@ public final class RBBIDataWrapper {
///CLOVER:OFF ///CLOVER:OFF
/* Debug function to display the break iterator data. */ /* Debug function to display the break iterator data. */
void dump(java.io.PrintStream out) { public void dump(java.io.PrintStream out) {
if (fFTable.length == 0) { if (fFTable.length == 0) {
// There is no table. Fail early for testing purposes. // There is no table. Fail early for testing purposes.
throw new NullPointerException(); throw new NullPointerException();

View File

@ -1046,7 +1046,7 @@ class RBBITableBuilder {
// Size of table size in shorts. // Size of table size in shorts.
// the "4" is the size of struct RBBIStateTableRow, the row header part only. // the "4" is the size of struct RBBIStateTableRow, the row header part only.
int rowLen = 4 + fRB.fSetBuilder.getNumCharCategories(); int rowLen = 4 + fRB.fSetBuilder.getNumCharCategories(); // Row Length in shorts.
int tableSize = getTableSize() / 2; int tableSize = getTableSize() / 2;
@ -1060,9 +1060,10 @@ class RBBITableBuilder {
table[RBBIDataWrapper.NUMSTATES] = (short)(numStates >>> 16); table[RBBIDataWrapper.NUMSTATES] = (short)(numStates >>> 16);
table[RBBIDataWrapper.NUMSTATES+1] = (short)(numStates & 0x0000ffff); table[RBBIDataWrapper.NUMSTATES+1] = (short)(numStates & 0x0000ffff);
// RBBIStateTable.fRowLen // RBBIStateTable.fRowLen. In bytes.
table[RBBIDataWrapper.ROWLEN] = (short)(rowLen >>> 16); int rowLenInBytes = rowLen * 2;
table[RBBIDataWrapper.ROWLEN+1] = (short)(rowLen & 0x0000ffff); table[RBBIDataWrapper.ROWLEN] = (short)(rowLenInBytes >>> 16);
table[RBBIDataWrapper.ROWLEN+1] = (short)(rowLenInBytes & 0x0000ffff);
// RBBIStateTable.fFlags // RBBIStateTable.fFlags
int flags = 0; int flags = 0;

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1 version https://git-lfs.github.com/spec/v1
oid sha256:f78382b447bb13c03234b53e18b013cea1d2ff6a0f71679885ee00d787003822 oid sha256:fa37df37ddad555dfc900d38890567a820d0efa27e72b9d9db81e914e2b558ec
size 12475101 size 12461103

View File

@ -18,6 +18,7 @@ package com.ibm.icu.dev.test.rbbi;
// or simply retired if it is no longer interesting. // or simply retired if it is no longer interesting.
import java.text.CharacterIterator; import java.text.CharacterIterator;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
@ -622,4 +623,25 @@ public class RBBITest extends TestFmwk {
} }
} }
} }
@Test
public void TestTableRebuild() {
// Test to verify that rebuilding the state tables from rule source for the standard
// break iterator types yields the same tables as are imported from ICU4C as part of the default data.
List<RuleBasedBreakIterator> breakIterators = new ArrayList<RuleBasedBreakIterator>();
breakIterators.add((RuleBasedBreakIterator)BreakIterator.getCharacterInstance(Locale.ENGLISH));
breakIterators.add((RuleBasedBreakIterator)BreakIterator.getWordInstance(Locale.ENGLISH));
breakIterators.add((RuleBasedBreakIterator)BreakIterator.getSentenceInstance(Locale.ENGLISH));
breakIterators.add((RuleBasedBreakIterator)BreakIterator.getLineInstance(Locale.ENGLISH));
for (RuleBasedBreakIterator bi: breakIterators) {
String rules = bi.toString();
RuleBasedBreakIterator bi2 = new RuleBasedBreakIterator(rules);
assertTrue("Forward Table", Arrays.equals(bi.fRData.fFTable, bi2.fRData.fFTable));
assertTrue("Reverse Table", Arrays.equals(bi.fRData.fRTable, bi2.fRData.fRTable));
assertTrue("Safe Forward Table", Arrays.equals(bi.fRData.fSFTable, bi2.fRData.fSFTable));
assertTrue("SafeForward Table", Arrays.equals(bi.fRData.fSRTable, bi2.fRData.fSRTable));
}
}
} }