ICU-13194 RBBI safe tables, improve code consistency between C++ and Java.
X-SVN-Rev: 41203
This commit is contained in:
parent
b6a2b3fddb
commit
7adb68f6f8
@ -303,16 +303,14 @@ RBBIDataHeader *RBBIRuleBuilder::build(UErrorCode &status) {
|
||||
}
|
||||
|
||||
void RBBIRuleBuilder::optimizeTables() {
|
||||
int32_t leftClass;
|
||||
int32_t rightClass;
|
||||
|
||||
leftClass = 3;
|
||||
rightClass = 0;
|
||||
|
||||
// Begin looking for duplicates with char class 3.
|
||||
// Classes 0, 1 and 2 are special; they are unused, {bof} and {eof} respectively,
|
||||
// and should not have other categories merged into them.
|
||||
IntPair duplPair = {3, 0};
|
||||
|
||||
while (fForwardTable->findDuplCharClassFrom(&duplPair)) {
|
||||
fSetBuilder->mergeCategories(duplPair.first, duplPair.second);
|
||||
fSetBuilder->mergeCategories(duplPair);
|
||||
fForwardTable->removeColumn(duplPair.second);
|
||||
}
|
||||
fForwardTable->removeDuplicateStates();
|
||||
|
@ -270,15 +270,15 @@ void RBBISetBuilder::buildTrie() {
|
||||
}
|
||||
|
||||
|
||||
void RBBISetBuilder::mergeCategories(int32_t left, int32_t right) {
|
||||
U_ASSERT(left >= 1);
|
||||
U_ASSERT(right > left);
|
||||
void RBBISetBuilder::mergeCategories(IntPair categories) {
|
||||
U_ASSERT(categories.first >= 1);
|
||||
U_ASSERT(categories.second > categories.first);
|
||||
for (RangeDescriptor *rd = fRangeList; rd != nullptr; rd = rd->fNext) {
|
||||
int32_t rangeNum = rd->fNum & ~DICT_BIT;
|
||||
int32_t rangeDict = rd->fNum & DICT_BIT;
|
||||
if (rangeNum == right) {
|
||||
rd->fNum = left | rangeDict;
|
||||
} else if (rangeNum > right) {
|
||||
if (rangeNum == categories.second) {
|
||||
rd->fNum = categories.first | rangeDict;
|
||||
} else if (rangeNum > categories.second) {
|
||||
rd->fNum--;
|
||||
}
|
||||
}
|
||||
|
@ -94,10 +94,12 @@ public:
|
||||
UChar32 getFirstChar(int32_t val) const;
|
||||
UBool sawBOF() const; // Indicate whether any references to the {bof} pseudo
|
||||
// character were encountered.
|
||||
/** merge two character categories that have been identified as having equivalent behavior.
|
||||
* The ranges belonging to the right category (table column) will be added to the left.
|
||||
/**
|
||||
* Merge two character categories that have been identified as having equivalent behavior.
|
||||
* The ranges belonging to the second category (table column) will be added to the first.
|
||||
* @param categories the pair of categories to be merged.
|
||||
*/
|
||||
void mergeCategories(int32_t left, int32_t right);
|
||||
void mergeCategories(IntPair categories);
|
||||
|
||||
static constexpr int32_t DICT_BIT = 0x4000;
|
||||
|
||||
|
@ -1179,7 +1179,9 @@ bool RBBITableBuilder::findDuplicateSafeState(IntPair *states) {
|
||||
}
|
||||
|
||||
|
||||
void RBBITableBuilder::removeState(int32_t keepState, int32_t duplState) {
|
||||
void RBBITableBuilder::removeState(IntPair duplStates) {
|
||||
const int32_t keepState = duplStates.first;
|
||||
const int32_t duplState = duplStates.second;
|
||||
U_ASSERT(keepState < duplState);
|
||||
U_ASSERT(duplState < fDStates->size());
|
||||
|
||||
@ -1214,7 +1216,9 @@ void RBBITableBuilder::removeState(int32_t keepState, int32_t duplState) {
|
||||
}
|
||||
}
|
||||
|
||||
void RBBITableBuilder::removeSafeState(int32_t keepState, int32_t duplState) {
|
||||
void RBBITableBuilder::removeSafeState(IntPair duplStates) {
|
||||
const int32_t keepState = duplStates.first;
|
||||
const int32_t duplState = duplStates.second;
|
||||
U_ASSERT(keepState < duplState);
|
||||
U_ASSERT(duplState < fSafeTable->size());
|
||||
|
||||
@ -1245,7 +1249,7 @@ void RBBITableBuilder::removeDuplicateStates() {
|
||||
IntPair dupls = {3, 0};
|
||||
while (findDuplicateState(&dupls)) {
|
||||
// printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second);
|
||||
removeState(dupls.first, dupls.second);
|
||||
removeState(dupls);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1430,7 +1434,7 @@ void RBBITableBuilder::buildSafeReverseTable(UErrorCode &status) {
|
||||
IntPair states = {1, 0};
|
||||
while (findDuplicateSafeState(&states)) {
|
||||
// printf("Removing duplicate safe states (%d, %d)\n", states.first, states.second);
|
||||
removeSafeState(states.first, states.second);
|
||||
removeSafeState(states);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -51,14 +51,15 @@ public:
|
||||
void exportTable(void *where);
|
||||
|
||||
/**
|
||||
* Find duplicate (redundant) character classes, beginning at the specified
|
||||
* pair, within this state table. This is an iterator-like function, used to
|
||||
* identify character classes (state table columns) that can be eliminated.
|
||||
* Find duplicate (redundant) character classes. Begin looking with categories.first.
|
||||
* Duplicate, if found are returned in the categories parameter.
|
||||
* This is an iterator-like function, used to identify character classes
|
||||
* (state table columns) that can be eliminated.
|
||||
* @param categories in/out parameter, specifies where to start looking for duplicates,
|
||||
* and returns the first pair of duplicates found, if any.
|
||||
* @return true if duplicate char classes were found, false otherwise.
|
||||
*/
|
||||
bool findDuplCharClassFrom(IntPair *statePair);
|
||||
bool findDuplCharClassFrom(IntPair *categories);
|
||||
|
||||
/** Remove a column from the state table. Used when two character categories
|
||||
* have been found equivalent, and merged together, to eliminate the uneeded table column.
|
||||
@ -110,12 +111,12 @@ private:
|
||||
*/
|
||||
bool findDuplicateState(IntPair *states);
|
||||
|
||||
/** Remove a duplicate state/
|
||||
* @param keepState First of the duplicate pair. Keep it.
|
||||
* @param duplState Duplicate state. Remove it. Redirect all references to the duplicate state
|
||||
* to refer to keepState instead.
|
||||
/** Remove a duplicate state.
|
||||
* @param duplStates The duplicate states. The first is kept, the second is removed.
|
||||
* All references to the second in the state table are retargeted
|
||||
* to the first.
|
||||
*/
|
||||
void removeState(int32_t keepState, int32_t duplState);
|
||||
void removeState(IntPair duplStates);
|
||||
|
||||
/** Find the next duplicate state in the safe reverse table. An iterator function.
|
||||
* @param states in/out parameter, specifies where to start looking for duplicates,
|
||||
@ -125,11 +126,11 @@ private:
|
||||
bool findDuplicateSafeState(IntPair *states);
|
||||
|
||||
/** Remove a duplicate state from the safe table.
|
||||
* @param keepState First of the duplicate pair. Keep it.
|
||||
* @param duplState Duplicate state. Remove it. Redirect all table references to the duplicate state
|
||||
* to refer to keepState instead.
|
||||
* @param duplStates The duplicate states. The first is kept, the second is removed.
|
||||
* All references to the second in the state table are retargeted
|
||||
* to the first.
|
||||
*/
|
||||
void removeSafeState(int32_t keepState, int32_t duplState);
|
||||
void removeSafeState(IntPair duplStates);
|
||||
|
||||
// Set functions for UVector.
|
||||
// TODO: make a USet subclass of UVector
|
||||
|
@ -70,13 +70,6 @@ public:
|
||||
RBBIDataWrapper *fData;
|
||||
private:
|
||||
|
||||
/**
|
||||
* The iteration state - current position, rule status for the current position,
|
||||
* and whether the iterator ran off the end, yielding UBRK_DONE.
|
||||
* Current position is pinned to be 0 < position <= text.length.
|
||||
* Current position is always set to a boundary.
|
||||
* @internal
|
||||
*/
|
||||
/**
|
||||
* The current position of the iterator. Pinned, 0 < fPosition <= text.length.
|
||||
* Never has the value UBRK_DONE (-1).
|
||||
|
Loading…
Reference in New Issue
Block a user