ICU-13569 rbbi table, remove duplicated states, working for C++.
X-SVN-Rev: 40902
This commit is contained in:
parent
1036ed52e3
commit
acae049ee1
@ -358,7 +358,7 @@ void RBBIRuleBuilder::optimizeTables() {
|
||||
int32_t rightClass;
|
||||
|
||||
leftClass = 3;
|
||||
rightClass = 4;
|
||||
rightClass = 0;
|
||||
while (fForwardTables->findDuplCharClassFrom(leftClass, rightClass)) {
|
||||
fSetBuilder->mergeCategories(leftClass, rightClass);
|
||||
fForwardTables->removeColumn(rightClass);
|
||||
@ -368,6 +368,9 @@ void RBBIRuleBuilder::optimizeTables() {
|
||||
}
|
||||
|
||||
fForwardTables->removeDuplicateStates();
|
||||
fReverseTables->removeDuplicateStates();
|
||||
fSafeFwdTables->removeDuplicateStates();
|
||||
fSafeRevTables->removeDuplicateStates();
|
||||
|
||||
|
||||
|
||||
|
@ -762,7 +762,7 @@ void RBBITableBuilder::flagAcceptingStates() {
|
||||
// if sd->fAccepting already had a value other than 0 or -1, leave it be.
|
||||
|
||||
// If the end marker node is from a look-ahead rule, set
|
||||
// the fLookAhead field or this state also.
|
||||
// the fLookAhead field for this state also.
|
||||
if (endMarker->fLookAheadEnd) {
|
||||
// TODO: don't change value if already set?
|
||||
// TODO: allow for more than one active look-ahead rule in engine.
|
||||
@ -1085,8 +1085,6 @@ bool RBBITableBuilder::findDuplCharClassFrom(int32_t &baseCategory, int32_t &dup
|
||||
int32_t numStates = fDStates->size();
|
||||
int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
|
||||
|
||||
U_ASSERT(baseCategory < duplCategory);
|
||||
|
||||
uint16_t table_base;
|
||||
uint16_t table_dupl;
|
||||
for (; baseCategory < numCols-1; ++baseCategory) {
|
||||
@ -1171,12 +1169,22 @@ void RBBITableBuilder::removeState(int32_t keepState, int32_t duplState) {
|
||||
int32_t existingVal = sd->fDtran->elementAti(col);
|
||||
int32_t newVal = existingVal;
|
||||
if (existingVal == duplState) {
|
||||
existingVal = keepState;
|
||||
newVal = keepState;
|
||||
} else if (existingVal > duplState) {
|
||||
newVal = existingVal - 1;
|
||||
}
|
||||
sd->fDtran->setElementAt(newVal, col);
|
||||
}
|
||||
if (sd->fAccepting == duplState) {
|
||||
sd->fAccepting = keepState;
|
||||
} else if (sd->fAccepting > duplState) {
|
||||
sd->fAccepting--;
|
||||
}
|
||||
if (sd->fLookAhead == duplState) {
|
||||
sd->fLookAhead = keepState;
|
||||
} else if (sd->fLookAhead > duplState) {
|
||||
sd->fLookAhead--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1185,13 +1193,12 @@ void RBBITableBuilder::removeState(int32_t keepState, int32_t duplState) {
|
||||
* RemoveDuplicateStates
|
||||
*/
|
||||
void RBBITableBuilder::removeDuplicateStates() {
|
||||
int32_t firstState = 0;
|
||||
int32_t firstState = 3;
|
||||
int32_t duplicateState = 0;
|
||||
while (findDuplicateState(firstState, duplicateState)) {
|
||||
printf("Removing duplicate states (%d, %d)\n", firstState, duplicateState);
|
||||
// printf("Removing duplicate states (%d, %d)\n", firstState, duplicateState);
|
||||
removeState(firstState, duplicateState);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
@ -4462,32 +4462,17 @@ void RBBITest::TestBug12677() {
|
||||
void RBBITest::TestTableRedundancies() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
UnicodeString rules {u"$s0=[;,*]; \n"
|
||||
"$s1=[a-z]; \n"
|
||||
"$s2=[i-n]; \n"
|
||||
"$s3=[x-z]; \n"
|
||||
"!!forward; \n"
|
||||
"($s0 | '?')*; \n"
|
||||
"($s1 | $s2 | $s3)*; \n" };
|
||||
|
||||
RuleBasedBreakIterator *lbi =
|
||||
(RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
|
||||
//lbi->dumpTables();
|
||||
UnicodeString lbRules = lbi->getRules();
|
||||
delete lbi;
|
||||
|
||||
UParseError pe {};
|
||||
RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(lbRules, pe, status);
|
||||
LocalPointer<RuleBasedBreakIterator> bi (
|
||||
(RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status));
|
||||
assertSuccess(WHERE, status);
|
||||
if (U_FAILURE(status)) return;
|
||||
bi->dumpTables();
|
||||
|
||||
RBBIDataWrapper *dw = bi->fData;
|
||||
const RBBIStateTable *fwtbl = dw->fForwardTable;
|
||||
int32_t numCharClasses = dw->fHeader->fCatCount;
|
||||
printf("Char Classes: %d states: %d\n", numCharClasses, fwtbl->fNumStates);
|
||||
// printf("Char Classes: %d states: %d\n", numCharClasses, fwtbl->fNumStates);
|
||||
|
||||
// Check for duplicate columns
|
||||
// Check for duplicate columns (character categories)
|
||||
|
||||
std::vector<UnicodeString> columns;
|
||||
for (int32_t column = 0; column < numCharClasses; column++) {
|
||||
@ -4498,23 +4483,23 @@ void RBBITest::TestTableRedundancies() {
|
||||
}
|
||||
columns.push_back(s);
|
||||
}
|
||||
for (int c1=0; c1<numCharClasses; c1++) {
|
||||
// Ignore column (char class) 0 while checking; it's special, and may have duplicates.
|
||||
for (int c1=1; c1<numCharClasses; c1++) {
|
||||
for (int c2 = c1+1; c2 < numCharClasses; c2++) {
|
||||
if (columns.at(c1) == columns.at(c2)) {
|
||||
printf("Duplicate columns (%d, %d)\n", c1, c2);
|
||||
break;
|
||||
errln("%s:%d Duplicate columns (%d, %d)\n", __FILE__, __LINE__, c1, c2);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
out:
|
||||
|
||||
// Check for duplicate states
|
||||
std::vector<UnicodeString> rows;
|
||||
for (int32_t r=0; r < (int32_t)fwtbl->fNumStates; r++) {
|
||||
UnicodeString s;
|
||||
RBBIStateTableRow *row = (RBBIStateTableRow *) (fwtbl->fTableData + (fwtbl->fRowLen * r));
|
||||
if (row->fAccepting < -1) {
|
||||
printf("row %d accepting = %d\n", r, row->fAccepting);
|
||||
}
|
||||
assertTrue(WHERE, row->fAccepting >= -1);
|
||||
s.append(row->fAccepting + 1); // values of -1 are expected.
|
||||
s.append(row->fLookAhead);
|
||||
s.append(row->fTagIdx);
|
||||
@ -4523,15 +4508,14 @@ void RBBITest::TestTableRedundancies() {
|
||||
}
|
||||
rows.push_back(s);
|
||||
}
|
||||
for (int r1=0; r1<(int32_t)fwtbl->fNumStates; r1++) {
|
||||
for (int r1=0; r1 < (int32_t)fwtbl->fNumStates; r1++) {
|
||||
for (int r2 = r1+1; r2 < (int32_t)fwtbl->fNumStates; r2++) {
|
||||
if (rows.at(r1) == rows.at(r2)) {
|
||||
printf("Duplicate rows (%d, %d)\n", r1, r2);
|
||||
break;
|
||||
errln("%s:%d Duplicate rows (%d, %d)\n", __FILE__, __LINE__, r1, r2);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
delete bi;
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user