diff --git a/icu4c/source/common/rbbirb.cpp b/icu4c/source/common/rbbirb.cpp index 3becda3127..a0f92da619 100644 --- a/icu4c/source/common/rbbirb.cpp +++ b/icu4c/source/common/rbbirb.cpp @@ -359,10 +359,7 @@ void RBBIRuleBuilder::optimizeTables() { leftClass = 3; rightClass = 4; - printf("Optimizing tables ...\n"); while (fForwardTables->findDuplCharClassFrom(leftClass, rightClass)) { - printf("Merging duplicate columns (%d, %d)\n", leftClass, rightClass); - fSetBuilder->mergeCategories(leftClass, rightClass); fForwardTables->removeColumn(rightClass); fReverseTables->removeColumn(rightClass); diff --git a/icu4c/source/common/rbbisetb.cpp b/icu4c/source/common/rbbisetb.cpp index 67bb460aca..108d127d45 100644 --- a/icu4c/source/common/rbbisetb.cpp +++ b/icu4c/source/common/rbbisetb.cpp @@ -274,9 +274,11 @@ void RBBISetBuilder::mergeCategories(int32_t left, int32_t right) { U_ASSERT(left >= 1); U_ASSERT(right > left); for (RangeDescriptor *rd = fRangeList; rd != nullptr; rd = rd->fNext) { - if (rd->fNum == right) { - rd->fNum = left; - } else if (rd->fNum > right) { + int32_t rangeNum = rd->fNum & ~DICT_BIT; + int32_t rangeDict = rd->fNum & DICT_BIT; + if (rangeNum == right) { + rd->fNum = left | rangeDict; + } else if (rangeNum > right) { rd->fNum--; } } @@ -465,7 +467,7 @@ void RBBISetBuilder::printRangeGroups() { lastPrintedGroupNum = groupNum; RBBIDebugPrintf("%2i ", groupNum); - if (rlRange->fNum & 0x4000) { RBBIDebugPrintf(" ");} + if (rlRange->fNum & DICT_BIT) { RBBIDebugPrintf(" ");} for (i=0; ifIncludesSets->size(); i++) { RBBINode *usetNode = (RBBINode *)rlRange->fIncludesSets->elementAt(i); @@ -658,20 +660,20 @@ void RangeDescriptor::split(UChar32 where, UErrorCode &status) { void RangeDescriptor::setDictionaryFlag() { int i; - for (i=0; ifIncludesSets->size(); i++) { - RBBINode *usetNode = (RBBINode *)fIncludesSets->elementAt(i); - UnicodeString setName; - RBBINode *setRef = usetNode->fParent; - if (setRef != NULL) { + static const char16_t *dictionary = u"dictionary"; + for (i=0; isize(); i++) { + RBBINode *usetNode = (RBBINode *)fIncludesSets->elementAt(i); + RBBINode *setRef = usetNode->fParent; + if (setRef != nullptr) { RBBINode *varRef = setRef->fParent; - if (varRef != NULL && varRef->fType == RBBINode::varRef) { - setName = varRef->fText; + if (varRef && varRef->fType == RBBINode::varRef) { + const UnicodeString *setName = &varRef->fText; + if (setName->compare(dictionary, -1) == 0) { + fNum |= RBBISetBuilder::DICT_BIT; + break; + } } } - if (setName.compare(UNICODE_STRING("dictionary", 10)) == 0) { // TODO: no string literals. - this->fNum |= 0x4000; - break; - } } } diff --git a/icu4c/source/common/rbbisetb.h b/icu4c/source/common/rbbisetb.h index 3f0ec1a8a0..a7a91b3b37 100644 --- a/icu4c/source/common/rbbisetb.h +++ b/icu4c/source/common/rbbisetb.h @@ -99,6 +99,8 @@ public: */ void mergeCategories(int32_t left, int32_t right); + static constexpr int32_t DICT_BIT = 0x4000; + #ifdef RBBI_DEBUG void printSets(); void printRanges(); diff --git a/icu4c/source/test/intltest/rbbitst.cpp b/icu4c/source/test/intltest/rbbitst.cpp index c6e0f457fb..b28723f456 100644 --- a/icu4c/source/test/intltest/rbbitst.cpp +++ b/icu4c/source/test/intltest/rbbitst.cpp @@ -4471,7 +4471,10 @@ void RBBITest::TestTableRedundancies() { "($s1 | $s2 | $s3)*; \n" }; RuleBasedBreakIterator *lbi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status); + //lbi->dumpTables(); rules = lbi->getRules(); + delete lbi; + UParseError pe {}; RuleBasedBreakIterator *bi = // (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);