ICU-13569 rbbi char class merging now working, tests passing.

X-SVN-Rev: 40881
This commit is contained in:
Andy Heninger 2018-02-09 18:53:28 +00:00
parent 4959b9b3a3
commit 0e3d103630
4 changed files with 22 additions and 18 deletions

View File

@ -359,10 +359,7 @@ void RBBIRuleBuilder::optimizeTables() {
leftClass = 3;
rightClass = 4;
printf("Optimizing tables ...\n");
while (fForwardTables->findDuplCharClassFrom(leftClass, rightClass)) {
printf("Merging duplicate columns (%d, %d)\n", leftClass, rightClass);
fSetBuilder->mergeCategories(leftClass, rightClass);
fForwardTables->removeColumn(rightClass);
fReverseTables->removeColumn(rightClass);

View File

@ -274,9 +274,11 @@ void RBBISetBuilder::mergeCategories(int32_t left, int32_t right) {
U_ASSERT(left >= 1);
U_ASSERT(right > left);
for (RangeDescriptor *rd = fRangeList; rd != nullptr; rd = rd->fNext) {
if (rd->fNum == right) {
rd->fNum = left;
} else if (rd->fNum > right) {
int32_t rangeNum = rd->fNum & ~DICT_BIT;
int32_t rangeDict = rd->fNum & DICT_BIT;
if (rangeNum == right) {
rd->fNum = left | rangeDict;
} else if (rangeNum > right) {
rd->fNum--;
}
}
@ -465,7 +467,7 @@ void RBBISetBuilder::printRangeGroups() {
lastPrintedGroupNum = groupNum;
RBBIDebugPrintf("%2i ", groupNum);
if (rlRange->fNum & 0x4000) { RBBIDebugPrintf(" <DICT> ");}
if (rlRange->fNum & DICT_BIT) { RBBIDebugPrintf(" <DICT> ");}
for (i=0; i<rlRange->fIncludesSets->size(); i++) {
RBBINode *usetNode = (RBBINode *)rlRange->fIncludesSets->elementAt(i);
@ -658,20 +660,20 @@ void RangeDescriptor::split(UChar32 where, UErrorCode &status) {
void RangeDescriptor::setDictionaryFlag() {
int i;
for (i=0; i<this->fIncludesSets->size(); i++) {
RBBINode *usetNode = (RBBINode *)fIncludesSets->elementAt(i);
UnicodeString setName;
RBBINode *setRef = usetNode->fParent;
if (setRef != NULL) {
static const char16_t *dictionary = u"dictionary";
for (i=0; i<fIncludesSets->size(); i++) {
RBBINode *usetNode = (RBBINode *)fIncludesSets->elementAt(i);
RBBINode *setRef = usetNode->fParent;
if (setRef != nullptr) {
RBBINode *varRef = setRef->fParent;
if (varRef != NULL && varRef->fType == RBBINode::varRef) {
setName = varRef->fText;
if (varRef && varRef->fType == RBBINode::varRef) {
const UnicodeString *setName = &varRef->fText;
if (setName->compare(dictionary, -1) == 0) {
fNum |= RBBISetBuilder::DICT_BIT;
break;
}
}
}
if (setName.compare(UNICODE_STRING("dictionary", 10)) == 0) { // TODO: no string literals.
this->fNum |= 0x4000;
break;
}
}
}

View File

@ -99,6 +99,8 @@ public:
*/
void mergeCategories(int32_t left, int32_t right);
static constexpr int32_t DICT_BIT = 0x4000;
#ifdef RBBI_DEBUG
void printSets();
void printRanges();

View File

@ -4471,7 +4471,10 @@ void RBBITest::TestTableRedundancies() {
"($s1 | $s2 | $s3)*; \n" };
RuleBasedBreakIterator *lbi =
(RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
//lbi->dumpTables();
rules = lbi->getRules();
delete lbi;
UParseError pe {};
RuleBasedBreakIterator *bi =
// (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);