ICU-1053 RBT no longer needs to pass a filter down the chain
X-SVN-Rev: 5261
This commit is contained in:
parent
5ded1a9c58
commit
b2d51635d3
@ -12,7 +12,10 @@
|
||||
#include "unicode/unifltlg.h"
|
||||
#include "uvector.h"
|
||||
|
||||
static const UChar NEWLINE = 10;
|
||||
// keep in sync with Transliterator
|
||||
static const UChar ID_SEP = 0x002D; /*-*/
|
||||
static const UChar ID_DELIM = 0x003B; /*;*/
|
||||
static const UChar NEWLINE = 10;
|
||||
|
||||
/**
|
||||
* Constructs a new compound transliterator given an array of
|
||||
|
@ -125,10 +125,8 @@ RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition&
|
||||
|
||||
while (index.start < index.limit && loopCount <= loopLimit) {
|
||||
TransliterationRule* r = isIncremental ?
|
||||
data->ruleSet.findIncrementalMatch(text, index, *data, isPartial,
|
||||
getFilter()) :
|
||||
data->ruleSet.findMatch(text, index, *data,
|
||||
getFilter());
|
||||
data->ruleSet.findIncrementalMatch(text, index, *data, isPartial) :
|
||||
data->ruleSet.findMatch(text, index, *data);
|
||||
|
||||
/* If we match a rule then apply it by replacing the key
|
||||
* with the rule output and repositioning the cursor
|
||||
|
@ -355,15 +355,10 @@ UBool TransliterationRule::masks(const TransliterationRule& r2) const {
|
||||
* @param cursor position at which to translate next, representing offset
|
||||
* into text. This value must be between <code>start</code> and
|
||||
* <code>limit</code>.
|
||||
* @param filter the filter. Any character for which
|
||||
* <tt>filter.contains()</tt> returns <tt>false</tt> will not be
|
||||
* altered by this transliterator. If <tt>filter</tt> is
|
||||
* <tt>null</tt> then no filtering is applied.
|
||||
*/
|
||||
UBool TransliterationRule::matches(const Replaceable& text,
|
||||
const UTransPosition& pos,
|
||||
const TransliterationRuleData& data,
|
||||
const UnicodeFilter* filter) const {
|
||||
const TransliterationRuleData& data) const {
|
||||
// Match anteContext, key, and postContext
|
||||
int32_t cursor = pos.start - anteContextLength;
|
||||
// Quick length check; this is a performance win for long rules.
|
||||
@ -374,7 +369,7 @@ UBool TransliterationRule::matches(const Replaceable& text,
|
||||
}
|
||||
for (int32_t i=0; i<pattern.length(); ++i, ++cursor) {
|
||||
if (!charMatches(pattern.charAt(i), text, cursor, pos,
|
||||
data, filter)) {
|
||||
data)) {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
@ -396,10 +391,6 @@ UBool TransliterationRule::matches(const Replaceable& text,
|
||||
* @param cursor position at which to translate next, representing offset
|
||||
* into text. This value must be between <code>start</code> and
|
||||
* <code>limit</code>.
|
||||
* @param filter the filter. Any character for which
|
||||
* <tt>filter.contains()</tt> returns <tt>false</tt> will not be
|
||||
* altered by this transliterator. If <tt>filter</tt> is
|
||||
* <tt>null</tt> then no filtering is applied.
|
||||
* @return one of <code>MISMATCH</code>, <code>PARTIAL_MATCH</code>, or
|
||||
* <code>FULL_MATCH</code>.
|
||||
* @see #MISMATCH
|
||||
@ -408,9 +399,8 @@ UBool TransliterationRule::matches(const Replaceable& text,
|
||||
*/
|
||||
int32_t TransliterationRule::getMatchDegree(const Replaceable& text,
|
||||
const UTransPosition& pos,
|
||||
const TransliterationRuleData& data,
|
||||
const UnicodeFilter* filter) const {
|
||||
int len = getRegionMatchLength(text, pos, data, filter);
|
||||
const TransliterationRuleData& data) const {
|
||||
int len = getRegionMatchLength(text, pos, data);
|
||||
return len < anteContextLength ? MISMATCH :
|
||||
(len < pattern.length() ? PARTIAL_MATCH : FULL_MATCH);
|
||||
}
|
||||
@ -429,18 +419,13 @@ int32_t TransliterationRule::getMatchDegree(const Replaceable& text,
|
||||
* <code>limit</code>.
|
||||
* @param data a dictionary of variables mapping <code>Character</code>
|
||||
* to <code>UnicodeSet</code>
|
||||
* @param filter the filter. Any character for which
|
||||
* <tt>filter.contains()</tt> returns <tt>false</tt> will not be
|
||||
* altered by this transliterator. If <tt>filter</tt> is
|
||||
* <tt>null</tt> then no filtering is applied.
|
||||
* @return -1 if there is a mismatch, 0 if the text is not long enough to
|
||||
* match any characters, otherwise the number of characters of text that
|
||||
* match this rule.
|
||||
*/
|
||||
int32_t TransliterationRule::getRegionMatchLength(const Replaceable& text,
|
||||
const UTransPosition& pos,
|
||||
const TransliterationRuleData& data,
|
||||
const UnicodeFilter* filter) const {
|
||||
const TransliterationRuleData& data) const {
|
||||
int32_t cursor = pos.start - anteContextLength;
|
||||
// Quick length check; this is a performance win for long rules.
|
||||
// Widen by one to allow anchor matching.
|
||||
@ -450,7 +435,7 @@ int32_t TransliterationRule::getRegionMatchLength(const Replaceable& text,
|
||||
int32_t i;
|
||||
for (i=0; i<pattern.length() && cursor<pos.contextLimit; ++i, ++cursor) {
|
||||
if (!charMatches(pattern.charAt(i), text, cursor, pos,
|
||||
data, filter)) {
|
||||
data)) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
@ -466,22 +451,16 @@ int32_t TransliterationRule::getRegionMatchLength(const Replaceable& text,
|
||||
* @param textChar a character in the text being transliterated
|
||||
* @param data a dictionary of variables mapping <code>Character</code>
|
||||
* to <code>UnicodeSet</code>
|
||||
* @param filter the filter. Any character for which
|
||||
* <tt>filter.contains()</tt> returns <tt>false</tt> will not be
|
||||
* altered by this transliterator. If <tt>filter</tt> is
|
||||
* <tt>null</tt> then no filtering is applied.
|
||||
*/
|
||||
UBool TransliterationRule::charMatches(UChar keyChar, const Replaceable& text,
|
||||
int32_t index,
|
||||
const UTransPosition& pos,
|
||||
const TransliterationRuleData& data,
|
||||
const UnicodeFilter* filter) const {
|
||||
const TransliterationRuleData& data) const {
|
||||
const UnicodeSet* set = 0;
|
||||
UChar textChar = (index >= pos.contextStart && index < pos.contextLimit)
|
||||
? text.charAt(index) : ETHER;
|
||||
return (filter == 0 || filter->contains(textChar)) &&
|
||||
(((set = data.lookupSet(keyChar)) == 0) ?
|
||||
keyChar == textChar : set->contains(textChar));
|
||||
return ((set = data.lookupSet(keyChar)) == 0) ?
|
||||
keyChar == textChar : set->contains(textChar);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -13,7 +13,6 @@
|
||||
|
||||
class Replaceable;
|
||||
class TransliterationRuleData;
|
||||
class UnicodeFilter;
|
||||
|
||||
/**
|
||||
* A transliteration rule used by
|
||||
@ -274,15 +273,10 @@ public:
|
||||
* @param cursor position at which to translate next, representing offset
|
||||
* into text. This value must be between <code>start</code> and
|
||||
* <code>limit</code>.
|
||||
* @param filter the filter. Any character for which
|
||||
* <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
|
||||
* altered by this transliterator. If <tt>filter</tt> is
|
||||
* <tt>null</tt> then no filtering is applied.
|
||||
*/
|
||||
virtual UBool matches(const Replaceable& text,
|
||||
const UTransPosition& pos,
|
||||
const TransliterationRuleData& data,
|
||||
const UnicodeFilter* filter) const;
|
||||
const TransliterationRuleData& data) const;
|
||||
|
||||
/**
|
||||
* Return the degree of match between this rule and the given text. The
|
||||
@ -299,10 +293,6 @@ public:
|
||||
* @param cursor position at which to translate next, representing offset
|
||||
* into text. This value must be between <code>start</code> and
|
||||
* <code>limit</code>.
|
||||
* @param filter the filter. Any character for which
|
||||
* <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
|
||||
* altered by this transliterator. If <tt>filter</tt> is
|
||||
* <tt>null</tt> then no filtering is applied.
|
||||
* @return one of <code>MISMATCH</code>, <code>PARTIAL_MATCH</code>, or
|
||||
* <code>FULL_MATCH</code>.
|
||||
* @see #MISMATCH
|
||||
@ -311,8 +301,7 @@ public:
|
||||
*/
|
||||
virtual int32_t getMatchDegree(const Replaceable& text,
|
||||
const UTransPosition& pos,
|
||||
const TransliterationRuleData& data,
|
||||
const UnicodeFilter* filter) const;
|
||||
const TransliterationRuleData& data) const;
|
||||
|
||||
/**
|
||||
* Return the number of characters of the text that match this rule. If
|
||||
@ -328,18 +317,13 @@ public:
|
||||
* <code>limit</code>.
|
||||
* @param data a dictionary of variables mapping <code>Character</code>
|
||||
* to <code>UnicodeSet</code>
|
||||
* @param filter the filter. Any character for which
|
||||
* <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
|
||||
* altered by this transliterator. If <tt>filter</tt> is
|
||||
* <tt>null</tt> then no filtering is applied.
|
||||
* @return -1 if there is a mismatch, 0 if the text is not long enough to
|
||||
* match any characters, otherwise the number of characters of text that
|
||||
* match this rule.
|
||||
*/
|
||||
virtual int32_t getRegionMatchLength(const Replaceable& text,
|
||||
const UTransPosition& pos,
|
||||
const TransliterationRuleData& data,
|
||||
const UnicodeFilter* filter) const;
|
||||
const TransliterationRuleData& data) const;
|
||||
|
||||
/**
|
||||
* Return true if the given key matches the given text. This method
|
||||
@ -350,16 +334,11 @@ public:
|
||||
* @param textChar a character in the text being transliterated
|
||||
* @param data a dictionary of variables mapping <code>Character</code>
|
||||
* to <code>UnicodeSet</code>
|
||||
* @param filter the filter. Any character for which
|
||||
* <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
|
||||
* altered by this transliterator. If <tt>filter</tt> is
|
||||
* <tt>null</tt> then no filtering is applied.
|
||||
*/
|
||||
virtual UBool charMatches(UChar keyChar, const Replaceable& textChar,
|
||||
int32_t index,
|
||||
const UTransPosition& pos,
|
||||
const TransliterationRuleData& data,
|
||||
const UnicodeFilter* filter) const;
|
||||
const TransliterationRuleData& data) const;
|
||||
|
||||
/**
|
||||
* Create a rule string that represents this rule object. Append
|
||||
|
@ -203,23 +203,18 @@ void TransliterationRuleSet::freeze(const TransliterationRuleData& data,
|
||||
* <code>limit</code>.
|
||||
* @param data a dictionary mapping variables to the sets they
|
||||
* represent (maps <code>Character</code> to <code>UnicodeSet</code>)
|
||||
* @param filter the filter. Any character for which
|
||||
* <tt>filter.contains()</tt> returns <tt>false</tt> will not be
|
||||
* altered by this transliterator. If <tt>filter</tt> is
|
||||
* <tt>null</tt> then no filtering is applied.
|
||||
* @return the matching rule, or null if none found.
|
||||
*/
|
||||
TransliterationRule*
|
||||
TransliterationRuleSet::findMatch(const Replaceable& text,
|
||||
const UTransPosition& pos,
|
||||
const TransliterationRuleData& data,
|
||||
const UnicodeFilter* filter) const {
|
||||
const TransliterationRuleData& data) const {
|
||||
/* We only need to check our indexed bin of the rule table,
|
||||
* based on the low byte of the first key character.
|
||||
*/
|
||||
int16_t x = (int16_t) (text.charAt(pos.start) & 0xFF);
|
||||
for (int32_t i=index[x]; i<index[x+1]; ++i) {
|
||||
if (rules[i]->matches(text, pos, data, filter)) {
|
||||
if (rules[i]->matches(text, pos, data)) {
|
||||
return rules[i];
|
||||
}
|
||||
}
|
||||
@ -246,10 +241,6 @@ TransliterationRuleSet::findMatch(const Replaceable& text,
|
||||
* represent (maps <code>Character</code> to <code>UnicodeSet</code>)
|
||||
* @param partial output parameter. <code>partial[0]</code> is set to
|
||||
* true if a partial match is returned.
|
||||
* @param filter the filter. Any character for which
|
||||
* <tt>filter.contains()</tt> returns <tt>false</tt> will not be
|
||||
* altered by this transliterator. If <tt>filter</tt> is
|
||||
* <tt>null</tt> then no filtering is applied.
|
||||
* @return the matching rule, or null if none found, or if the text buffer
|
||||
* does not have enough text yet to unambiguously match a rule.
|
||||
*/
|
||||
@ -257,8 +248,7 @@ TransliterationRule*
|
||||
TransliterationRuleSet::findIncrementalMatch(const Replaceable& text,
|
||||
const UTransPosition& pos,
|
||||
const TransliterationRuleData& data,
|
||||
UBool& isPartial,
|
||||
const UnicodeFilter* filter) const {
|
||||
UBool& isPartial) const {
|
||||
|
||||
/* We only need to check our indexed bin of the rule table,
|
||||
* based on the low byte of the first key character.
|
||||
@ -266,7 +256,7 @@ TransliterationRuleSet::findIncrementalMatch(const Replaceable& text,
|
||||
isPartial = FALSE;
|
||||
int16_t x = (int16_t) (text.charAt(pos.start) & 0xFF);
|
||||
for (int32_t i=index[x]; i<index[x+1]; ++i) {
|
||||
int32_t match = rules[i]->getMatchDegree(text, pos, data, filter);
|
||||
int32_t match = rules[i]->getMatchDegree(text, pos, data);
|
||||
switch (match) {
|
||||
case TransliterationRule::FULL_MATCH:
|
||||
return rules[i];
|
||||
|
@ -113,16 +113,12 @@ public:
|
||||
* <code>limit</code>.
|
||||
* @param data a dictionary mapping variables to the sets they
|
||||
* represent (maps <code>Character</code> to <code>UnicodeSet</code>)
|
||||
* @param filter the filter. Any character for which
|
||||
* <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
|
||||
* altered by this transliterator. If <tt>filter</tt> is
|
||||
* <tt>null</tt> then no filtering is applied.
|
||||
* @return the matching rule, or null if none found.
|
||||
*/
|
||||
virtual TransliterationRule* findMatch(const Replaceable& text,
|
||||
const UTransPosition& pos,
|
||||
const TransliterationRuleData& data,
|
||||
const UnicodeFilter* filter) const;
|
||||
const TransliterationRuleData& data) const;
|
||||
|
||||
/**
|
||||
* Attempt to find a matching rule at the specified point in the text.
|
||||
@ -154,8 +150,7 @@ public:
|
||||
virtual TransliterationRule* findIncrementalMatch(const Replaceable& text,
|
||||
const UTransPosition& pos,
|
||||
const TransliterationRuleData& data,
|
||||
UBool& isPartial,
|
||||
const UnicodeFilter* filter) const;
|
||||
UBool& isPartial) const;
|
||||
|
||||
/**
|
||||
* Create rule strings that represents this rule set.
|
||||
|
@ -36,9 +36,9 @@
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/unitohex.h"
|
||||
|
||||
const UChar Transliterator::ID_SEP = 0x002D; /*-*/
|
||||
const UChar Transliterator::ID_DELIM = 0x003B; /*;*/
|
||||
|
||||
// keep in sync with CompoundTransliterator
|
||||
static const UChar ID_SEP = 0x002D; /*-*/
|
||||
static const UChar ID_DELIM = 0x003B; /*;*/
|
||||
static const UChar OPEN_PAREN = 40;
|
||||
static const UChar CLOSE_PAREN = 41;
|
||||
|
||||
|
@ -662,6 +662,8 @@ protected:
|
||||
UTransPosition& index,
|
||||
UBool incremental) const;
|
||||
|
||||
friend class CompoundTransliterator; // for filteredTransliterate
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
@ -1013,13 +1015,6 @@ protected:
|
||||
|
||||
private:
|
||||
static void initializeCache(void);
|
||||
|
||||
/* IDs take the form <source> ID_SEP <target>, where
|
||||
* <source> and <target> are (usually) script names.
|
||||
* Compound IDs take the form <ID> ( ID_DELIM <ID> )+.
|
||||
*/
|
||||
static const UChar ID_SEP; // ((UChar)0x002D) /*-*/
|
||||
static const UChar ID_DELIM; // ((UChar)0x003B) /*;*/
|
||||
};
|
||||
|
||||
inline int32_t Transliterator::getMaximumContextLength(void) const {
|
||||
|
Loading…
Reference in New Issue
Block a user