scuffed-code/icu4c/source/i18n/dbbi_bld.cpp

65 lines
2.5 KiB
C++
Raw Normal View History

/**
* The Builder class for DictionaryBasedBreakIterator inherits almost all of
* its functionality from the Builder class for RuleBasedBreakIterator, but
* extends it with extra logic to handle the "<dictionary>" token
*/
protected class Builder extends RuleBasedBreakIterator.Builder {
/**
* A CharSet that contains all the characters represented in the dictionary
*/
private CharSet dictionaryChars = new CharSet();
private String dictionaryExpression = "";
/**
* No special initialization
*/
public Builder() {
}
/**
* We override handleSpecialSubstitution() to add logic to handle
* the <dictionary> tag. If we see a substitution named "<dictionary>",
* parse the substitution expression and store the result in
* dictionaryChars.
*/
protected void handleSpecialSubstitution(String replace, String replaceWith,
int startPos, String description) {
super.handleSpecialSubstitution(replace, replaceWith, startPos, description);
if (replace.equals("<dictionary>")) {
if (replaceWith.charAt(0) == '(') {
error("Dictionary group can't be enclosed in (", startPos, description);
}
dictionaryExpression = replaceWith;
dictionaryChars = CharSet.parseString(replaceWith);
}
}
/**
* The other half of the logic to handle the dictionary characters happens here.
* After the inherited builder has derived the real character categories, we
* set up the categoryFlags array in the iterator. This array contains "true"
* for every character category that includes a dictionary character.
*/
protected void buildCharCategories(Vector tempRuleList) {
super.buildCharCategories(tempRuleList);
categoryFlags = new boolean[categories.size()];
for (int i = 0; i < categories.size(); i++) {
CharSet cs = (CharSet)categories.elementAt(i);
if (!(cs.intersection(dictionaryChars).empty())) {
categoryFlags[i] = TRUE;
}
}
}
// This function is actually called by RuleBasedBreakIterator.buildCharCategories(),
// which is called by the function above. This gives us a way to create a separate
// character category for the dictionary characters even when RuleBasedBreakIterator
// isn't making a distinction
protected void mungeExpressionList(Hashtable expressions) {
expressions.put(dictionaryExpression, dictionaryChars);
}
}