From b2d51635d3983c5a2c90761cde341dd4d43e95c9 Mon Sep 17 00:00:00 2001 From: Alan Liu Date: Tue, 17 Jul 2001 23:55:42 +0000 Subject: [PATCH] ICU-1053 RBT no longer needs to pass a filter down the chain X-SVN-Rev: 5261 --- icu4c/source/i18n/cpdtrans.cpp | 5 +++- icu4c/source/i18n/rbt.cpp | 6 ++--- icu4c/source/i18n/rbt_rule.cpp | 39 +++++++--------------------- icu4c/source/i18n/rbt_rule.h | 29 +++------------------ icu4c/source/i18n/rbt_set.cpp | 18 +++---------- icu4c/source/i18n/rbt_set.h | 9 ++----- icu4c/source/i18n/translit.cpp | 6 ++--- icu4c/source/i18n/unicode/translit.h | 9 ++----- 8 files changed, 30 insertions(+), 91 deletions(-) diff --git a/icu4c/source/i18n/cpdtrans.cpp b/icu4c/source/i18n/cpdtrans.cpp index 4fc25b60d9..61e654d2d9 100644 --- a/icu4c/source/i18n/cpdtrans.cpp +++ b/icu4c/source/i18n/cpdtrans.cpp @@ -12,7 +12,10 @@ #include "unicode/unifltlg.h" #include "uvector.h" -static const UChar NEWLINE = 10; +// keep in sync with Transliterator +static const UChar ID_SEP = 0x002D; /*-*/ +static const UChar ID_DELIM = 0x003B; /*;*/ +static const UChar NEWLINE = 10; /** * Constructs a new compound transliterator given an array of diff --git a/icu4c/source/i18n/rbt.cpp b/icu4c/source/i18n/rbt.cpp index 97eef0373d..0882b5db37 100644 --- a/icu4c/source/i18n/rbt.cpp +++ b/icu4c/source/i18n/rbt.cpp @@ -125,10 +125,8 @@ RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition& while (index.start < index.limit && loopCount <= loopLimit) { TransliterationRule* r = isIncremental ? - data->ruleSet.findIncrementalMatch(text, index, *data, isPartial, - getFilter()) : - data->ruleSet.findMatch(text, index, *data, - getFilter()); + data->ruleSet.findIncrementalMatch(text, index, *data, isPartial) : + data->ruleSet.findMatch(text, index, *data); /* If we match a rule then apply it by replacing the key * with the rule output and repositioning the cursor diff --git a/icu4c/source/i18n/rbt_rule.cpp b/icu4c/source/i18n/rbt_rule.cpp index c07d83bbf2..e3d9618abf 100644 --- a/icu4c/source/i18n/rbt_rule.cpp +++ b/icu4c/source/i18n/rbt_rule.cpp @@ -355,15 +355,10 @@ UBool TransliterationRule::masks(const TransliterationRule& r2) const { * @param cursor position at which to translate next, representing offset * into text. This value must be between start and * limit. - * @param filter the filter. Any character for which - * filter.contains() returns false will not be - * altered by this transliterator. If filter is - * null then no filtering is applied. */ UBool TransliterationRule::matches(const Replaceable& text, const UTransPosition& pos, - const TransliterationRuleData& data, - const UnicodeFilter* filter) const { + const TransliterationRuleData& data) const { // Match anteContext, key, and postContext int32_t cursor = pos.start - anteContextLength; // Quick length check; this is a performance win for long rules. @@ -374,7 +369,7 @@ UBool TransliterationRule::matches(const Replaceable& text, } for (int32_t i=0; istart and * limit. - * @param filter the filter. Any character for which - * filter.contains() returns false will not be - * altered by this transliterator. If filter is - * null then no filtering is applied. * @return one of MISMATCH, PARTIAL_MATCH, or * FULL_MATCH. * @see #MISMATCH @@ -408,9 +399,8 @@ UBool TransliterationRule::matches(const Replaceable& text, */ int32_t TransliterationRule::getMatchDegree(const Replaceable& text, const UTransPosition& pos, - const TransliterationRuleData& data, - const UnicodeFilter* filter) const { - int len = getRegionMatchLength(text, pos, data, filter); + const TransliterationRuleData& data) const { + int len = getRegionMatchLength(text, pos, data); return len < anteContextLength ? MISMATCH : (len < pattern.length() ? PARTIAL_MATCH : FULL_MATCH); } @@ -429,18 +419,13 @@ int32_t TransliterationRule::getMatchDegree(const Replaceable& text, * limit. * @param data a dictionary of variables mapping Character * to UnicodeSet - * @param filter the filter. Any character for which - * filter.contains() returns false will not be - * altered by this transliterator. If filter is - * null then no filtering is applied. * @return -1 if there is a mismatch, 0 if the text is not long enough to * match any characters, otherwise the number of characters of text that * match this rule. */ int32_t TransliterationRule::getRegionMatchLength(const Replaceable& text, const UTransPosition& pos, - const TransliterationRuleData& data, - const UnicodeFilter* filter) const { + const TransliterationRuleData& data) const { int32_t cursor = pos.start - anteContextLength; // Quick length check; this is a performance win for long rules. // Widen by one to allow anchor matching. @@ -450,7 +435,7 @@ int32_t TransliterationRule::getRegionMatchLength(const Replaceable& text, int32_t i; for (i=0; iCharacter * to UnicodeSet - * @param filter the filter. Any character for which - * filter.contains() returns false will not be - * altered by this transliterator. If filter is - * null then no filtering is applied. */ UBool TransliterationRule::charMatches(UChar keyChar, const Replaceable& text, int32_t index, const UTransPosition& pos, - const TransliterationRuleData& data, - const UnicodeFilter* filter) const { + const TransliterationRuleData& data) const { const UnicodeSet* set = 0; UChar textChar = (index >= pos.contextStart && index < pos.contextLimit) ? text.charAt(index) : ETHER; - return (filter == 0 || filter->contains(textChar)) && - (((set = data.lookupSet(keyChar)) == 0) ? - keyChar == textChar : set->contains(textChar)); + return ((set = data.lookupSet(keyChar)) == 0) ? + keyChar == textChar : set->contains(textChar); } /** diff --git a/icu4c/source/i18n/rbt_rule.h b/icu4c/source/i18n/rbt_rule.h index 517825f544..e6dd4cdcee 100644 --- a/icu4c/source/i18n/rbt_rule.h +++ b/icu4c/source/i18n/rbt_rule.h @@ -13,7 +13,6 @@ class Replaceable; class TransliterationRuleData; -class UnicodeFilter; /** * A transliteration rule used by @@ -274,15 +273,10 @@ public: * @param cursor position at which to translate next, representing offset * into text. This value must be between start and * limit. - * @param filter the filter. Any character for which - * filter.isIn() returns false will not be - * altered by this transliterator. If filter is - * null then no filtering is applied. */ virtual UBool matches(const Replaceable& text, const UTransPosition& pos, - const TransliterationRuleData& data, - const UnicodeFilter* filter) const; + const TransliterationRuleData& data) const; /** * Return the degree of match between this rule and the given text. The @@ -299,10 +293,6 @@ public: * @param cursor position at which to translate next, representing offset * into text. This value must be between start and * limit. - * @param filter the filter. Any character for which - * filter.isIn() returns false will not be - * altered by this transliterator. If filter is - * null then no filtering is applied. * @return one of MISMATCH, PARTIAL_MATCH, or * FULL_MATCH. * @see #MISMATCH @@ -311,8 +301,7 @@ public: */ virtual int32_t getMatchDegree(const Replaceable& text, const UTransPosition& pos, - const TransliterationRuleData& data, - const UnicodeFilter* filter) const; + const TransliterationRuleData& data) const; /** * Return the number of characters of the text that match this rule. If @@ -328,18 +317,13 @@ public: * limit. * @param data a dictionary of variables mapping Character * to UnicodeSet - * @param filter the filter. Any character for which - * filter.isIn() returns false will not be - * altered by this transliterator. If filter is - * null then no filtering is applied. * @return -1 if there is a mismatch, 0 if the text is not long enough to * match any characters, otherwise the number of characters of text that * match this rule. */ virtual int32_t getRegionMatchLength(const Replaceable& text, const UTransPosition& pos, - const TransliterationRuleData& data, - const UnicodeFilter* filter) const; + const TransliterationRuleData& data) const; /** * Return true if the given key matches the given text. This method @@ -350,16 +334,11 @@ public: * @param textChar a character in the text being transliterated * @param data a dictionary of variables mapping Character * to UnicodeSet - * @param filter the filter. Any character for which - * filter.isIn() returns false will not be - * altered by this transliterator. If filter is - * null then no filtering is applied. */ virtual UBool charMatches(UChar keyChar, const Replaceable& textChar, int32_t index, const UTransPosition& pos, - const TransliterationRuleData& data, - const UnicodeFilter* filter) const; + const TransliterationRuleData& data) const; /** * Create a rule string that represents this rule object. Append diff --git a/icu4c/source/i18n/rbt_set.cpp b/icu4c/source/i18n/rbt_set.cpp index e879cc7a01..ecf3663e18 100644 --- a/icu4c/source/i18n/rbt_set.cpp +++ b/icu4c/source/i18n/rbt_set.cpp @@ -203,23 +203,18 @@ void TransliterationRuleSet::freeze(const TransliterationRuleData& data, * limit. * @param data a dictionary mapping variables to the sets they * represent (maps Character to UnicodeSet) - * @param filter the filter. Any character for which - * filter.contains() returns false will not be - * altered by this transliterator. If filter is - * null then no filtering is applied. * @return the matching rule, or null if none found. */ TransliterationRule* TransliterationRuleSet::findMatch(const Replaceable& text, const UTransPosition& pos, - const TransliterationRuleData& data, - const UnicodeFilter* filter) const { + const TransliterationRuleData& data) const { /* We only need to check our indexed bin of the rule table, * based on the low byte of the first key character. */ int16_t x = (int16_t) (text.charAt(pos.start) & 0xFF); for (int32_t i=index[x]; imatches(text, pos, data, filter)) { + if (rules[i]->matches(text, pos, data)) { return rules[i]; } } @@ -246,10 +241,6 @@ TransliterationRuleSet::findMatch(const Replaceable& text, * represent (maps Character to UnicodeSet) * @param partial output parameter. partial[0] is set to * true if a partial match is returned. - * @param filter the filter. Any character for which - * filter.contains() returns false will not be - * altered by this transliterator. If filter is - * null then no filtering is applied. * @return the matching rule, or null if none found, or if the text buffer * does not have enough text yet to unambiguously match a rule. */ @@ -257,8 +248,7 @@ TransliterationRule* TransliterationRuleSet::findIncrementalMatch(const Replaceable& text, const UTransPosition& pos, const TransliterationRuleData& data, - UBool& isPartial, - const UnicodeFilter* filter) const { + UBool& isPartial) const { /* We only need to check our indexed bin of the rule table, * based on the low byte of the first key character. @@ -266,7 +256,7 @@ TransliterationRuleSet::findIncrementalMatch(const Replaceable& text, isPartial = FALSE; int16_t x = (int16_t) (text.charAt(pos.start) & 0xFF); for (int32_t i=index[x]; igetMatchDegree(text, pos, data, filter); + int32_t match = rules[i]->getMatchDegree(text, pos, data); switch (match) { case TransliterationRule::FULL_MATCH: return rules[i]; diff --git a/icu4c/source/i18n/rbt_set.h b/icu4c/source/i18n/rbt_set.h index 48054f830a..d473a6768e 100644 --- a/icu4c/source/i18n/rbt_set.h +++ b/icu4c/source/i18n/rbt_set.h @@ -113,16 +113,12 @@ public: * limit. * @param data a dictionary mapping variables to the sets they * represent (maps Character to UnicodeSet) - * @param filter the filter. Any character for which - * filter.isIn() returns false will not be - * altered by this transliterator. If filter is * null then no filtering is applied. * @return the matching rule, or null if none found. */ virtual TransliterationRule* findMatch(const Replaceable& text, const UTransPosition& pos, - const TransliterationRuleData& data, - const UnicodeFilter* filter) const; + const TransliterationRuleData& data) const; /** * Attempt to find a matching rule at the specified point in the text. @@ -154,8 +150,7 @@ public: virtual TransliterationRule* findIncrementalMatch(const Replaceable& text, const UTransPosition& pos, const TransliterationRuleData& data, - UBool& isPartial, - const UnicodeFilter* filter) const; + UBool& isPartial) const; /** * Create rule strings that represents this rule set. diff --git a/icu4c/source/i18n/translit.cpp b/icu4c/source/i18n/translit.cpp index eb3da2afe9..dbb5d14759 100644 --- a/icu4c/source/i18n/translit.cpp +++ b/icu4c/source/i18n/translit.cpp @@ -36,9 +36,9 @@ #include "unicode/uniset.h" #include "unicode/unitohex.h" -const UChar Transliterator::ID_SEP = 0x002D; /*-*/ -const UChar Transliterator::ID_DELIM = 0x003B; /*;*/ - +// keep in sync with CompoundTransliterator +static const UChar ID_SEP = 0x002D; /*-*/ +static const UChar ID_DELIM = 0x003B; /*;*/ static const UChar OPEN_PAREN = 40; static const UChar CLOSE_PAREN = 41; diff --git a/icu4c/source/i18n/unicode/translit.h b/icu4c/source/i18n/unicode/translit.h index 3df4629d09..d6dacedc06 100644 --- a/icu4c/source/i18n/unicode/translit.h +++ b/icu4c/source/i18n/unicode/translit.h @@ -662,6 +662,8 @@ protected: UTransPosition& index, UBool incremental) const; + friend class CompoundTransliterator; // for filteredTransliterate + public: /** @@ -1013,13 +1015,6 @@ protected: private: static void initializeCache(void); - - /* IDs take the form ID_SEP , where - * and are (usually) script names. - * Compound IDs take the form ( ID_DELIM )+. - */ - static const UChar ID_SEP; // ((UChar)0x002D) /*-*/ - static const UChar ID_DELIM; // ((UChar)0x003B) /*;*/ }; inline int32_t Transliterator::getMaximumContextLength(void) const {