From 25950362de911aca8f4d07f5c0a545f89cf116f0 Mon Sep 17 00:00:00 2001 From: Andy Heninger Date: Thu, 15 Feb 2018 23:21:54 +0000 Subject: [PATCH] ICU-13569 RBBI State table size reduction, move reduced tables to ICU4J; better testing and small bug fix in ICU4J builder. X-SVN-Rev: 40926 --- .../src/com/ibm/icu/text/RBBIDataWrapper.java | 10 ++++----- .../com/ibm/icu/text/RBBITableBuilder.java | 9 ++++---- icu4j/main/shared/data/icudata.jar | 4 ++-- .../com/ibm/icu/dev/test/rbbi/RBBITest.java | 22 +++++++++++++++++++ 4 files changed, 34 insertions(+), 11 deletions(-) diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIDataWrapper.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIDataWrapper.java index cf39ca0c4e..6ef666f0bc 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIDataWrapper.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIDataWrapper.java @@ -30,10 +30,10 @@ public final class RBBIDataWrapper { // read from the file. // public RBBIDataHeader fHeader; - public short fFTable[]; - short fRTable[]; - short fSFTable[]; - short fSRTable[]; + public short fFTable[]; + public short fRTable[]; + public short fSFTable[]; + public short fSRTable[]; Trie2 fTrie; String fRuleSource; int fStatusTable[]; @@ -339,7 +339,7 @@ public final class RBBIDataWrapper { ///CLOVER:OFF /* Debug function to display the break iterator data. */ - void dump(java.io.PrintStream out) { + public void dump(java.io.PrintStream out) { if (fFTable.length == 0) { // There is no table. Fail early for testing purposes. throw new NullPointerException(); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBITableBuilder.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBITableBuilder.java index 9130ad81b5..44b5e1a05f 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBITableBuilder.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBITableBuilder.java @@ -1046,7 +1046,7 @@ class RBBITableBuilder { // Size of table size in shorts. // the "4" is the size of struct RBBIStateTableRow, the row header part only. - int rowLen = 4 + fRB.fSetBuilder.getNumCharCategories(); + int rowLen = 4 + fRB.fSetBuilder.getNumCharCategories(); // Row Length in shorts. int tableSize = getTableSize() / 2; @@ -1060,9 +1060,10 @@ class RBBITableBuilder { table[RBBIDataWrapper.NUMSTATES] = (short)(numStates >>> 16); table[RBBIDataWrapper.NUMSTATES+1] = (short)(numStates & 0x0000ffff); - // RBBIStateTable.fRowLen - table[RBBIDataWrapper.ROWLEN] = (short)(rowLen >>> 16); - table[RBBIDataWrapper.ROWLEN+1] = (short)(rowLen & 0x0000ffff); + // RBBIStateTable.fRowLen. In bytes. + int rowLenInBytes = rowLen * 2; + table[RBBIDataWrapper.ROWLEN] = (short)(rowLenInBytes >>> 16); + table[RBBIDataWrapper.ROWLEN+1] = (short)(rowLenInBytes & 0x0000ffff); // RBBIStateTable.fFlags int flags = 0; diff --git a/icu4j/main/shared/data/icudata.jar b/icu4j/main/shared/data/icudata.jar index 7cd2ee72c3..932eb6bf3b 100755 --- a/icu4j/main/shared/data/icudata.jar +++ b/icu4j/main/shared/data/icudata.jar @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f78382b447bb13c03234b53e18b013cea1d2ff6a0f71679885ee00d787003822 -size 12475101 +oid sha256:fa37df37ddad555dfc900d38890567a820d0efa27e72b9d9db81e914e2b558ec +size 12461103 diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITest.java index e1dd1e3b28..4151b8146c 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITest.java @@ -18,6 +18,7 @@ package com.ibm.icu.dev.test.rbbi; // or simply retired if it is no longer interesting. import java.text.CharacterIterator; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Locale; @@ -622,4 +623,25 @@ public class RBBITest extends TestFmwk { } } } + + @Test + public void TestTableRebuild() { + // Test to verify that rebuilding the state tables from rule source for the standard + // break iterator types yields the same tables as are imported from ICU4C as part of the default data. + List breakIterators = new ArrayList(); + breakIterators.add((RuleBasedBreakIterator)BreakIterator.getCharacterInstance(Locale.ENGLISH)); + breakIterators.add((RuleBasedBreakIterator)BreakIterator.getWordInstance(Locale.ENGLISH)); + breakIterators.add((RuleBasedBreakIterator)BreakIterator.getSentenceInstance(Locale.ENGLISH)); + breakIterators.add((RuleBasedBreakIterator)BreakIterator.getLineInstance(Locale.ENGLISH)); + + for (RuleBasedBreakIterator bi: breakIterators) { + String rules = bi.toString(); + RuleBasedBreakIterator bi2 = new RuleBasedBreakIterator(rules); + + assertTrue("Forward Table", Arrays.equals(bi.fRData.fFTable, bi2.fRData.fFTable)); + assertTrue("Reverse Table", Arrays.equals(bi.fRData.fRTable, bi2.fRData.fRTable)); + assertTrue("Safe Forward Table", Arrays.equals(bi.fRData.fSFTable, bi2.fRData.fSFTable)); + assertTrue("SafeForward Table", Arrays.equals(bi.fRData.fSRTable, bi2.fRData.fSRTable)); + } + } }