ICU-1053 RBT no longer needs to pass a filter down the chain

X-SVN-Rev: 5261
This commit is contained in:
Alan Liu 2001-07-17 23:55:42 +00:00
parent 5ded1a9c58
commit b2d51635d3
8 changed files with 30 additions and 91 deletions

View File

@ -12,7 +12,10 @@
#include "unicode/unifltlg.h"
#include "uvector.h"
static const UChar NEWLINE = 10;
// keep in sync with Transliterator
static const UChar ID_SEP = 0x002D; /*-*/
static const UChar ID_DELIM = 0x003B; /*;*/
static const UChar NEWLINE = 10;
/**
* Constructs a new compound transliterator given an array of

View File

@ -125,10 +125,8 @@ RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition&
while (index.start < index.limit && loopCount <= loopLimit) {
TransliterationRule* r = isIncremental ?
data->ruleSet.findIncrementalMatch(text, index, *data, isPartial,
getFilter()) :
data->ruleSet.findMatch(text, index, *data,
getFilter());
data->ruleSet.findIncrementalMatch(text, index, *data, isPartial) :
data->ruleSet.findMatch(text, index, *data);
/* If we match a rule then apply it by replacing the key
* with the rule output and repositioning the cursor

View File

@ -355,15 +355,10 @@ UBool TransliterationRule::masks(const TransliterationRule& r2) const {
* @param cursor position at which to translate next, representing offset
* into text. This value must be between <code>start</code> and
* <code>limit</code>.
* @param filter the filter. Any character for which
* <tt>filter.contains()</tt> returns <tt>false</tt> will not be
* altered by this transliterator. If <tt>filter</tt> is
* <tt>null</tt> then no filtering is applied.
*/
UBool TransliterationRule::matches(const Replaceable& text,
const UTransPosition& pos,
const TransliterationRuleData& data,
const UnicodeFilter* filter) const {
const TransliterationRuleData& data) const {
// Match anteContext, key, and postContext
int32_t cursor = pos.start - anteContextLength;
// Quick length check; this is a performance win for long rules.
@ -374,7 +369,7 @@ UBool TransliterationRule::matches(const Replaceable& text,
}
for (int32_t i=0; i<pattern.length(); ++i, ++cursor) {
if (!charMatches(pattern.charAt(i), text, cursor, pos,
data, filter)) {
data)) {
return FALSE;
}
}
@ -396,10 +391,6 @@ UBool TransliterationRule::matches(const Replaceable& text,
* @param cursor position at which to translate next, representing offset
* into text. This value must be between <code>start</code> and
* <code>limit</code>.
* @param filter the filter. Any character for which
* <tt>filter.contains()</tt> returns <tt>false</tt> will not be
* altered by this transliterator. If <tt>filter</tt> is
* <tt>null</tt> then no filtering is applied.
* @return one of <code>MISMATCH</code>, <code>PARTIAL_MATCH</code>, or
* <code>FULL_MATCH</code>.
* @see #MISMATCH
@ -408,9 +399,8 @@ UBool TransliterationRule::matches(const Replaceable& text,
*/
int32_t TransliterationRule::getMatchDegree(const Replaceable& text,
const UTransPosition& pos,
const TransliterationRuleData& data,
const UnicodeFilter* filter) const {
int len = getRegionMatchLength(text, pos, data, filter);
const TransliterationRuleData& data) const {
int len = getRegionMatchLength(text, pos, data);
return len < anteContextLength ? MISMATCH :
(len < pattern.length() ? PARTIAL_MATCH : FULL_MATCH);
}
@ -429,18 +419,13 @@ int32_t TransliterationRule::getMatchDegree(const Replaceable& text,
* <code>limit</code>.
* @param data a dictionary of variables mapping <code>Character</code>
* to <code>UnicodeSet</code>
* @param filter the filter. Any character for which
* <tt>filter.contains()</tt> returns <tt>false</tt> will not be
* altered by this transliterator. If <tt>filter</tt> is
* <tt>null</tt> then no filtering is applied.
* @return -1 if there is a mismatch, 0 if the text is not long enough to
* match any characters, otherwise the number of characters of text that
* match this rule.
*/
int32_t TransliterationRule::getRegionMatchLength(const Replaceable& text,
const UTransPosition& pos,
const TransliterationRuleData& data,
const UnicodeFilter* filter) const {
const TransliterationRuleData& data) const {
int32_t cursor = pos.start - anteContextLength;
// Quick length check; this is a performance win for long rules.
// Widen by one to allow anchor matching.
@ -450,7 +435,7 @@ int32_t TransliterationRule::getRegionMatchLength(const Replaceable& text,
int32_t i;
for (i=0; i<pattern.length() && cursor<pos.contextLimit; ++i, ++cursor) {
if (!charMatches(pattern.charAt(i), text, cursor, pos,
data, filter)) {
data)) {
return -1;
}
}
@ -466,22 +451,16 @@ int32_t TransliterationRule::getRegionMatchLength(const Replaceable& text,
* @param textChar a character in the text being transliterated
* @param data a dictionary of variables mapping <code>Character</code>
* to <code>UnicodeSet</code>
* @param filter the filter. Any character for which
* <tt>filter.contains()</tt> returns <tt>false</tt> will not be
* altered by this transliterator. If <tt>filter</tt> is
* <tt>null</tt> then no filtering is applied.
*/
UBool TransliterationRule::charMatches(UChar keyChar, const Replaceable& text,
int32_t index,
const UTransPosition& pos,
const TransliterationRuleData& data,
const UnicodeFilter* filter) const {
const TransliterationRuleData& data) const {
const UnicodeSet* set = 0;
UChar textChar = (index >= pos.contextStart && index < pos.contextLimit)
? text.charAt(index) : ETHER;
return (filter == 0 || filter->contains(textChar)) &&
(((set = data.lookupSet(keyChar)) == 0) ?
keyChar == textChar : set->contains(textChar));
return ((set = data.lookupSet(keyChar)) == 0) ?
keyChar == textChar : set->contains(textChar);
}
/**

View File

@ -13,7 +13,6 @@
class Replaceable;
class TransliterationRuleData;
class UnicodeFilter;
/**
* A transliteration rule used by
@ -274,15 +273,10 @@ public:
* @param cursor position at which to translate next, representing offset
* into text. This value must be between <code>start</code> and
* <code>limit</code>.
* @param filter the filter. Any character for which
* <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
* altered by this transliterator. If <tt>filter</tt> is
* <tt>null</tt> then no filtering is applied.
*/
virtual UBool matches(const Replaceable& text,
const UTransPosition& pos,
const TransliterationRuleData& data,
const UnicodeFilter* filter) const;
const TransliterationRuleData& data) const;
/**
* Return the degree of match between this rule and the given text. The
@ -299,10 +293,6 @@ public:
* @param cursor position at which to translate next, representing offset
* into text. This value must be between <code>start</code> and
* <code>limit</code>.
* @param filter the filter. Any character for which
* <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
* altered by this transliterator. If <tt>filter</tt> is
* <tt>null</tt> then no filtering is applied.
* @return one of <code>MISMATCH</code>, <code>PARTIAL_MATCH</code>, or
* <code>FULL_MATCH</code>.
* @see #MISMATCH
@ -311,8 +301,7 @@ public:
*/
virtual int32_t getMatchDegree(const Replaceable& text,
const UTransPosition& pos,
const TransliterationRuleData& data,
const UnicodeFilter* filter) const;
const TransliterationRuleData& data) const;
/**
* Return the number of characters of the text that match this rule. If
@ -328,18 +317,13 @@ public:
* <code>limit</code>.
* @param data a dictionary of variables mapping <code>Character</code>
* to <code>UnicodeSet</code>
* @param filter the filter. Any character for which
* <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
* altered by this transliterator. If <tt>filter</tt> is
* <tt>null</tt> then no filtering is applied.
* @return -1 if there is a mismatch, 0 if the text is not long enough to
* match any characters, otherwise the number of characters of text that
* match this rule.
*/
virtual int32_t getRegionMatchLength(const Replaceable& text,
const UTransPosition& pos,
const TransliterationRuleData& data,
const UnicodeFilter* filter) const;
const TransliterationRuleData& data) const;
/**
* Return true if the given key matches the given text. This method
@ -350,16 +334,11 @@ public:
* @param textChar a character in the text being transliterated
* @param data a dictionary of variables mapping <code>Character</code>
* to <code>UnicodeSet</code>
* @param filter the filter. Any character for which
* <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
* altered by this transliterator. If <tt>filter</tt> is
* <tt>null</tt> then no filtering is applied.
*/
virtual UBool charMatches(UChar keyChar, const Replaceable& textChar,
int32_t index,
const UTransPosition& pos,
const TransliterationRuleData& data,
const UnicodeFilter* filter) const;
const TransliterationRuleData& data) const;
/**
* Create a rule string that represents this rule object. Append

View File

@ -203,23 +203,18 @@ void TransliterationRuleSet::freeze(const TransliterationRuleData& data,
* <code>limit</code>.
* @param data a dictionary mapping variables to the sets they
* represent (maps <code>Character</code> to <code>UnicodeSet</code>)
* @param filter the filter. Any character for which
* <tt>filter.contains()</tt> returns <tt>false</tt> will not be
* altered by this transliterator. If <tt>filter</tt> is
* <tt>null</tt> then no filtering is applied.
* @return the matching rule, or null if none found.
*/
TransliterationRule*
TransliterationRuleSet::findMatch(const Replaceable& text,
const UTransPosition& pos,
const TransliterationRuleData& data,
const UnicodeFilter* filter) const {
const TransliterationRuleData& data) const {
/* We only need to check our indexed bin of the rule table,
* based on the low byte of the first key character.
*/
int16_t x = (int16_t) (text.charAt(pos.start) & 0xFF);
for (int32_t i=index[x]; i<index[x+1]; ++i) {
if (rules[i]->matches(text, pos, data, filter)) {
if (rules[i]->matches(text, pos, data)) {
return rules[i];
}
}
@ -246,10 +241,6 @@ TransliterationRuleSet::findMatch(const Replaceable& text,
* represent (maps <code>Character</code> to <code>UnicodeSet</code>)
* @param partial output parameter. <code>partial[0]</code> is set to
* true if a partial match is returned.
* @param filter the filter. Any character for which
* <tt>filter.contains()</tt> returns <tt>false</tt> will not be
* altered by this transliterator. If <tt>filter</tt> is
* <tt>null</tt> then no filtering is applied.
* @return the matching rule, or null if none found, or if the text buffer
* does not have enough text yet to unambiguously match a rule.
*/
@ -257,8 +248,7 @@ TransliterationRule*
TransliterationRuleSet::findIncrementalMatch(const Replaceable& text,
const UTransPosition& pos,
const TransliterationRuleData& data,
UBool& isPartial,
const UnicodeFilter* filter) const {
UBool& isPartial) const {
/* We only need to check our indexed bin of the rule table,
* based on the low byte of the first key character.
@ -266,7 +256,7 @@ TransliterationRuleSet::findIncrementalMatch(const Replaceable& text,
isPartial = FALSE;
int16_t x = (int16_t) (text.charAt(pos.start) & 0xFF);
for (int32_t i=index[x]; i<index[x+1]; ++i) {
int32_t match = rules[i]->getMatchDegree(text, pos, data, filter);
int32_t match = rules[i]->getMatchDegree(text, pos, data);
switch (match) {
case TransliterationRule::FULL_MATCH:
return rules[i];

View File

@ -113,16 +113,12 @@ public:
* <code>limit</code>.
* @param data a dictionary mapping variables to the sets they
* represent (maps <code>Character</code> to <code>UnicodeSet</code>)
* @param filter the filter. Any character for which
* <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
* altered by this transliterator. If <tt>filter</tt> is
* <tt>null</tt> then no filtering is applied.
* @return the matching rule, or null if none found.
*/
virtual TransliterationRule* findMatch(const Replaceable& text,
const UTransPosition& pos,
const TransliterationRuleData& data,
const UnicodeFilter* filter) const;
const TransliterationRuleData& data) const;
/**
* Attempt to find a matching rule at the specified point in the text.
@ -154,8 +150,7 @@ public:
virtual TransliterationRule* findIncrementalMatch(const Replaceable& text,
const UTransPosition& pos,
const TransliterationRuleData& data,
UBool& isPartial,
const UnicodeFilter* filter) const;
UBool& isPartial) const;
/**
* Create rule strings that represents this rule set.

View File

@ -36,9 +36,9 @@
#include "unicode/uniset.h"
#include "unicode/unitohex.h"
const UChar Transliterator::ID_SEP = 0x002D; /*-*/
const UChar Transliterator::ID_DELIM = 0x003B; /*;*/
// keep in sync with CompoundTransliterator
static const UChar ID_SEP = 0x002D; /*-*/
static const UChar ID_DELIM = 0x003B; /*;*/
static const UChar OPEN_PAREN = 40;
static const UChar CLOSE_PAREN = 41;

View File

@ -662,6 +662,8 @@ protected:
UTransPosition& index,
UBool incremental) const;
friend class CompoundTransliterator; // for filteredTransliterate
public:
/**
@ -1013,13 +1015,6 @@ protected:
private:
static void initializeCache(void);
/* IDs take the form <source> ID_SEP <target>, where
* <source> and <target> are (usually) script names.
* Compound IDs take the form <ID> ( ID_DELIM <ID> )+.
*/
static const UChar ID_SEP; // ((UChar)0x002D) /*-*/
static const UChar ID_DELIM; // ((UChar)0x003B) /*;*/
};
inline int32_t Transliterator::getMaximumContextLength(void) const {