ICU-13569 rbbi table, remove duplicated states, working for C++.

X-SVN-Rev: 40902
This commit is contained in:
Andy Heninger 2018-02-13 01:08:29 +00:00
parent 1036ed52e3
commit acae049ee1
3 changed files with 31 additions and 37 deletions

View File

@ -358,7 +358,7 @@ void RBBIRuleBuilder::optimizeTables() {
int32_t rightClass;
leftClass = 3;
rightClass = 4;
rightClass = 0;
while (fForwardTables->findDuplCharClassFrom(leftClass, rightClass)) {
fSetBuilder->mergeCategories(leftClass, rightClass);
fForwardTables->removeColumn(rightClass);
@ -368,6 +368,9 @@ void RBBIRuleBuilder::optimizeTables() {
}
fForwardTables->removeDuplicateStates();
fReverseTables->removeDuplicateStates();
fSafeFwdTables->removeDuplicateStates();
fSafeRevTables->removeDuplicateStates();

View File

@ -762,7 +762,7 @@ void RBBITableBuilder::flagAcceptingStates() {
// if sd->fAccepting already had a value other than 0 or -1, leave it be.
// If the end marker node is from a look-ahead rule, set
// the fLookAhead field or this state also.
// the fLookAhead field for this state also.
if (endMarker->fLookAheadEnd) {
// TODO: don't change value if already set?
// TODO: allow for more than one active look-ahead rule in engine.
@ -1085,8 +1085,6 @@ bool RBBITableBuilder::findDuplCharClassFrom(int32_t &baseCategory, int32_t &dup
int32_t numStates = fDStates->size();
int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
U_ASSERT(baseCategory < duplCategory);
uint16_t table_base;
uint16_t table_dupl;
for (; baseCategory < numCols-1; ++baseCategory) {
@ -1171,12 +1169,22 @@ void RBBITableBuilder::removeState(int32_t keepState, int32_t duplState) {
int32_t existingVal = sd->fDtran->elementAti(col);
int32_t newVal = existingVal;
if (existingVal == duplState) {
existingVal = keepState;
newVal = keepState;
} else if (existingVal > duplState) {
newVal = existingVal - 1;
}
sd->fDtran->setElementAt(newVal, col);
}
if (sd->fAccepting == duplState) {
sd->fAccepting = keepState;
} else if (sd->fAccepting > duplState) {
sd->fAccepting--;
}
if (sd->fLookAhead == duplState) {
sd->fLookAhead = keepState;
} else if (sd->fLookAhead > duplState) {
sd->fLookAhead--;
}
}
}
@ -1185,13 +1193,12 @@ void RBBITableBuilder::removeState(int32_t keepState, int32_t duplState) {
* RemoveDuplicateStates
*/
void RBBITableBuilder::removeDuplicateStates() {
int32_t firstState = 0;
int32_t firstState = 3;
int32_t duplicateState = 0;
while (findDuplicateState(firstState, duplicateState)) {
printf("Removing duplicate states (%d, %d)\n", firstState, duplicateState);
// printf("Removing duplicate states (%d, %d)\n", firstState, duplicateState);
removeState(firstState, duplicateState);
}
}
//-----------------------------------------------------------------------------

View File

@ -4462,32 +4462,17 @@ void RBBITest::TestBug12677() {
void RBBITest::TestTableRedundancies() {
UErrorCode status = U_ZERO_ERROR;
UnicodeString rules {u"$s0=[;,*]; \n"
"$s1=[a-z]; \n"
"$s2=[i-n]; \n"
"$s3=[x-z]; \n"
"!!forward; \n"
"($s0 | '?')*; \n"
"($s1 | $s2 | $s3)*; \n" };
RuleBasedBreakIterator *lbi =
(RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
//lbi->dumpTables();
UnicodeString lbRules = lbi->getRules();
delete lbi;
UParseError pe {};
RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(lbRules, pe, status);
LocalPointer<RuleBasedBreakIterator> bi (
(RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status));
assertSuccess(WHERE, status);
if (U_FAILURE(status)) return;
bi->dumpTables();
RBBIDataWrapper *dw = bi->fData;
const RBBIStateTable *fwtbl = dw->fForwardTable;
int32_t numCharClasses = dw->fHeader->fCatCount;
printf("Char Classes: %d states: %d\n", numCharClasses, fwtbl->fNumStates);
// printf("Char Classes: %d states: %d\n", numCharClasses, fwtbl->fNumStates);
// Check for duplicate columns
// Check for duplicate columns (character categories)
std::vector<UnicodeString> columns;
for (int32_t column = 0; column < numCharClasses; column++) {
@ -4498,23 +4483,23 @@ void RBBITest::TestTableRedundancies() {
}
columns.push_back(s);
}
for (int c1=0; c1<numCharClasses; c1++) {
// Ignore column (char class) 0 while checking; it's special, and may have duplicates.
for (int c1=1; c1<numCharClasses; c1++) {
for (int c2 = c1+1; c2 < numCharClasses; c2++) {
if (columns.at(c1) == columns.at(c2)) {
printf("Duplicate columns (%d, %d)\n", c1, c2);
break;
errln("%s:%d Duplicate columns (%d, %d)\n", __FILE__, __LINE__, c1, c2);
goto out;
}
}
}
out:
// Check for duplicate states
std::vector<UnicodeString> rows;
for (int32_t r=0; r < (int32_t)fwtbl->fNumStates; r++) {
UnicodeString s;
RBBIStateTableRow *row = (RBBIStateTableRow *) (fwtbl->fTableData + (fwtbl->fRowLen * r));
if (row->fAccepting < -1) {
printf("row %d accepting = %d\n", r, row->fAccepting);
}
assertTrue(WHERE, row->fAccepting >= -1);
s.append(row->fAccepting + 1); // values of -1 are expected.
s.append(row->fLookAhead);
s.append(row->fTagIdx);
@ -4523,15 +4508,14 @@ void RBBITest::TestTableRedundancies() {
}
rows.push_back(s);
}
for (int r1=0; r1<(int32_t)fwtbl->fNumStates; r1++) {
for (int r1=0; r1 < (int32_t)fwtbl->fNumStates; r1++) {
for (int r2 = r1+1; r2 < (int32_t)fwtbl->fNumStates; r2++) {
if (rows.at(r1) == rows.at(r2)) {
printf("Duplicate rows (%d, %d)\n", r1, r2);
break;
errln("%s:%d Duplicate rows (%d, %d)\n", __FILE__, __LINE__, r1, r2);
return;
}
}
}
delete bi;
}