ICU-1053 RBT no longer needs to pass a filter down the chain

X-SVN-Rev: 5261
2001-07-17 23:55:42 +00:00 · 2001-07-17 23:55:42 +00:00 · b2d51635d3
commit b2d51635d3
parent 5ded1a9c58
8 changed files with 30 additions and 91 deletions
--- a/icu4c/source/i18n/cpdtrans.cpp
+++ b/icu4c/source/i18n/cpdtrans.cpp
@ -12,7 +12,10 @@
 #include "unicode/unifltlg.h"
 #include "uvector.h"

-static const UChar NEWLINE = 10;
+// keep in sync with Transliterator
+static const UChar ID_SEP   = 0x002D; /*-*/
+static const UChar ID_DELIM = 0x003B; /*;*/
+static const UChar NEWLINE  = 10;

 /**
 * Constructs a new compound transliterator given an array of
--- a/icu4c/source/i18n/rbt.cpp
+++ b/icu4c/source/i18n/rbt.cpp
@ -125,10 +125,8 @@ RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition&

    while (index.start < index.limit && loopCount <= loopLimit) {
        TransliterationRule* r = isIncremental ?
-            data->ruleSet.findIncrementalMatch(text, index, *data, isPartial,
-                                               getFilter()) :
-            data->ruleSet.findMatch(text, index, *data,
-                                    getFilter());
+            data->ruleSet.findIncrementalMatch(text, index, *data, isPartial) :
+            data->ruleSet.findMatch(text, index, *data);

        /* If we match a rule then apply it by replacing the key
         * with the rule output and repositioning the cursor
--- a/icu4c/source/i18n/rbt_rule.cpp
+++ b/icu4c/source/i18n/rbt_rule.cpp
@ -355,15 +355,10 @@ UBool TransliterationRule::masks(const TransliterationRule& r2) const {
 * @param cursor position at which to translate next, representing offset
 * into text.  This value must be between <code>start</code> and
 * <code>limit</code>.
- * @param filter the filter.  Any character for which
- * <tt>filter.contains()</tt> returns <tt>false</tt> will not be
- * altered by this transliterator.  If <tt>filter</tt> is
- * <tt>null</tt> then no filtering is applied.
 */
 UBool TransliterationRule::matches(const Replaceable& text,
                                   const UTransPosition& pos,
-                                   const TransliterationRuleData& data,
-                                   const UnicodeFilter* filter) const {
+                                   const TransliterationRuleData& data) const {
    // Match anteContext, key, and postContext
    int32_t cursor = pos.start - anteContextLength;
    // Quick length check; this is a performance win for long rules.
@ -374,7 +369,7 @@ UBool TransliterationRule::matches(const Replaceable& text,
    }
    for (int32_t i=0; i<pattern.length(); ++i, ++cursor) {
        if (!charMatches(pattern.charAt(i), text, cursor, pos,
-                         data, filter)) {
+                         data)) {
            return FALSE;
        }
    }
@ -396,10 +391,6 @@ UBool TransliterationRule::matches(const Replaceable& text,
 * @param cursor position at which to translate next, representing offset
 * into text.  This value must be between <code>start</code> and
 * <code>limit</code>.
- * @param filter the filter.  Any character for which
- * <tt>filter.contains()</tt> returns <tt>false</tt> will not be
- * altered by this transliterator.  If <tt>filter</tt> is
- * <tt>null</tt> then no filtering is applied.
 * @return one of <code>MISMATCH</code>, <code>PARTIAL_MATCH</code>, or
 * <code>FULL_MATCH</code>.
 * @see #MISMATCH
@ -408,9 +399,8 @@ UBool TransliterationRule::matches(const Replaceable& text,
 */
 int32_t TransliterationRule::getMatchDegree(const Replaceable& text,
                                            const UTransPosition& pos,
-                                            const TransliterationRuleData& data,
-                                            const UnicodeFilter* filter) const {
-    int len = getRegionMatchLength(text, pos, data, filter);
+                                            const TransliterationRuleData& data) const {
+    int len = getRegionMatchLength(text, pos, data);
    return len < anteContextLength ? MISMATCH :
        (len < pattern.length() ? PARTIAL_MATCH : FULL_MATCH);
 }
@ -429,18 +419,13 @@ int32_t TransliterationRule::getMatchDegree(const Replaceable& text,
 * <code>limit</code>.
 * @param data a dictionary of variables mapping <code>Character</code>
 * to <code>UnicodeSet</code>
- * @param filter the filter.  Any character for which
- * <tt>filter.contains()</tt> returns <tt>false</tt> will not be
- * altered by this transliterator.  If <tt>filter</tt> is
- * <tt>null</tt> then no filtering is applied.
 * @return -1 if there is a mismatch, 0 if the text is not long enough to
 * match any characters, otherwise the number of characters of text that
 * match this rule.
 */
 int32_t TransliterationRule::getRegionMatchLength(const Replaceable& text,
                                          const UTransPosition& pos,
-                                          const TransliterationRuleData& data,
-                                          const UnicodeFilter* filter) const {
+                                          const TransliterationRuleData& data) const {
    int32_t cursor = pos.start - anteContextLength;
    // Quick length check; this is a performance win for long rules.
    // Widen by one to allow anchor matching.
@ -450,7 +435,7 @@ int32_t TransliterationRule::getRegionMatchLength(const Replaceable& text,
    int32_t i;
    for (i=0; i<pattern.length() && cursor<pos.contextLimit; ++i, ++cursor) {
        if (!charMatches(pattern.charAt(i), text, cursor, pos,
-                         data, filter)) {
+                         data)) {
            return -1;
        }
    }
@ -466,22 +451,16 @@ int32_t TransliterationRule::getRegionMatchLength(const Replaceable& text,
 * @param textChar a character in the text being transliterated
 * @param data a dictionary of variables mapping <code>Character</code>
 * to <code>UnicodeSet</code>
- * @param filter the filter.  Any character for which
- * <tt>filter.contains()</tt> returns <tt>false</tt> will not be
- * altered by this transliterator.  If <tt>filter</tt> is
- * <tt>null</tt> then no filtering is applied.
 */
 UBool TransliterationRule::charMatches(UChar keyChar, const Replaceable& text,
                                       int32_t index,
                                       const UTransPosition& pos,
-                                       const TransliterationRuleData& data,
-                                       const UnicodeFilter* filter) const {
+                                       const TransliterationRuleData& data) const {
    const UnicodeSet* set = 0;
    UChar textChar = (index >= pos.contextStart && index < pos.contextLimit)
            ? text.charAt(index) : ETHER;
-    return (filter == 0 || filter->contains(textChar)) &&
-        (((set = data.lookupSet(keyChar)) == 0) ?
-         keyChar == textChar : set->contains(textChar));
+    return ((set = data.lookupSet(keyChar)) == 0) ?
+            keyChar == textChar : set->contains(textChar);
 }

 /**
--- a/icu4c/source/i18n/rbt_rule.h
+++ b/icu4c/source/i18n/rbt_rule.h
@ -13,7 +13,6 @@

 class Replaceable;
 class TransliterationRuleData;
-class UnicodeFilter;

 /**
 * A transliteration rule used by
@ -274,15 +273,10 @@ public:
     * @param cursor position at which to translate next, representing offset
     * into text.  This value must be between <code>start</code> and
     * <code>limit</code>.
-     * @param filter the filter.  Any character for which
-     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
-     * altered by this transliterator.  If <tt>filter</tt> is
-     * <tt>null</tt> then no filtering is applied.
     */
    virtual UBool matches(const Replaceable& text,
                          const UTransPosition& pos,
-                          const TransliterationRuleData& data,
-                          const UnicodeFilter* filter) const;
+                          const TransliterationRuleData& data) const;

    /**
     * Return the degree of match between this rule and the given text.  The
@ -299,10 +293,6 @@ public:
     * @param cursor position at which to translate next, representing offset
     * into text.  This value must be between <code>start</code> and
     * <code>limit</code>.
-     * @param filter the filter.  Any character for which
-     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
-     * altered by this transliterator.  If <tt>filter</tt> is
-     * <tt>null</tt> then no filtering is applied.
     * @return one of <code>MISMATCH</code>, <code>PARTIAL_MATCH</code>, or
     * <code>FULL_MATCH</code>.
     * @see #MISMATCH
@ -311,8 +301,7 @@ public:
     */
    virtual int32_t getMatchDegree(const Replaceable& text,
                                   const UTransPosition& pos,
-                                   const TransliterationRuleData& data,
-                                   const UnicodeFilter* filter) const;
+                                   const TransliterationRuleData& data) const;

    /**
     * Return the number of characters of the text that match this rule.  If
@ -328,18 +317,13 @@ public:
     * <code>limit</code>.
     * @param data a dictionary of variables mapping <code>Character</code>
     * to <code>UnicodeSet</code>
-     * @param filter the filter.  Any character for which
-     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
-     * altered by this transliterator.  If <tt>filter</tt> is
-     * <tt>null</tt> then no filtering is applied.
     * @return -1 if there is a mismatch, 0 if the text is not long enough to
     * match any characters, otherwise the number of characters of text that
     * match this rule.
     */
    virtual int32_t getRegionMatchLength(const Replaceable& text,
                                         const UTransPosition& pos,
-                                         const TransliterationRuleData& data,
-                                         const UnicodeFilter* filter) const;
+                                         const TransliterationRuleData& data) const;

    /**
     * Return true if the given key matches the given text.  This method
@ -350,16 +334,11 @@ public:
     * @param textChar a character in the text being transliterated
     * @param data a dictionary of variables mapping <code>Character</code>
     * to <code>UnicodeSet</code>
-     * @param filter the filter.  Any character for which
-     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
-     * altered by this transliterator.  If <tt>filter</tt> is
-     * <tt>null</tt> then no filtering is applied.
     */
    virtual UBool charMatches(UChar keyChar, const Replaceable& textChar,
                              int32_t index,
                              const UTransPosition& pos,
-                              const TransliterationRuleData& data,
-                              const UnicodeFilter* filter) const;
+                              const TransliterationRuleData& data) const;

    /**
     * Create a rule string that represents this rule object.  Append
--- a/icu4c/source/i18n/rbt_set.cpp
+++ b/icu4c/source/i18n/rbt_set.cpp
@ -203,23 +203,18 @@ void TransliterationRuleSet::freeze(const TransliterationRuleData& data,
 * <code>limit</code>.
 * @param data a dictionary mapping variables to the sets they
 * represent (maps <code>Character</code> to <code>UnicodeSet</code>)
- * @param filter the filter.  Any character for which
- * <tt>filter.contains()</tt> returns <tt>false</tt> will not be
- * altered by this transliterator.  If <tt>filter</tt> is
- * <tt>null</tt> then no filtering is applied.
 * @return the matching rule, or null if none found.
 */
 TransliterationRule*
 TransliterationRuleSet::findMatch(const Replaceable& text,
                                  const UTransPosition& pos,
-                                  const TransliterationRuleData& data,
-                                  const UnicodeFilter* filter) const {
+                                  const TransliterationRuleData& data) const {
    /* We only need to check our indexed bin of the rule table,
     * based on the low byte of the first key character.
     */
    int16_t x = (int16_t) (text.charAt(pos.start) & 0xFF);
    for (int32_t i=index[x]; i<index[x+1]; ++i) {
-        if (rules[i]->matches(text, pos, data, filter)) {
+        if (rules[i]->matches(text, pos, data)) {
            return rules[i];
        }
    }
@ -246,10 +241,6 @@ TransliterationRuleSet::findMatch(const Replaceable& text,
 * represent (maps <code>Character</code> to <code>UnicodeSet</code>)
 * @param partial output parameter.  <code>partial[0]</code> is set to
 * true if a partial match is returned.
- * @param filter the filter.  Any character for which
- * <tt>filter.contains()</tt> returns <tt>false</tt> will not be
- * altered by this transliterator.  If <tt>filter</tt> is
- * <tt>null</tt> then no filtering is applied.
 * @return the matching rule, or null if none found, or if the text buffer
 * does not have enough text yet to unambiguously match a rule.
 */
@ -257,8 +248,7 @@ TransliterationRule*
 TransliterationRuleSet::findIncrementalMatch(const Replaceable& text,
                                             const UTransPosition& pos,
                                             const TransliterationRuleData& data,
-                                             UBool& isPartial,
-                                             const UnicodeFilter* filter) const {
+                                             UBool& isPartial) const {

    /* We only need to check our indexed bin of the rule table,
     * based on the low byte of the first key character.
@ -266,7 +256,7 @@ TransliterationRuleSet::findIncrementalMatch(const Replaceable& text,
    isPartial = FALSE;
    int16_t x = (int16_t) (text.charAt(pos.start) & 0xFF);
    for (int32_t i=index[x]; i<index[x+1]; ++i) {
-        int32_t match = rules[i]->getMatchDegree(text, pos, data, filter);
+        int32_t match = rules[i]->getMatchDegree(text, pos, data);
        switch (match) {
        case TransliterationRule::FULL_MATCH:
            return rules[i];
--- a/icu4c/source/i18n/rbt_set.h
+++ b/icu4c/source/i18n/rbt_set.h
@ -113,16 +113,12 @@ public:
     * <code>limit</code>.
     * @param data a dictionary mapping variables to the sets they
     * represent (maps <code>Character</code> to <code>UnicodeSet</code>)
-     * @param filter the filter.  Any character for which
-     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
-     * altered by this transliterator.  If <tt>filter</tt> is
     * <tt>null</tt> then no filtering is applied.
     * @return the matching rule, or null if none found.
     */
    virtual TransliterationRule* findMatch(const Replaceable& text,
                                           const UTransPosition& pos,
-                                           const TransliterationRuleData& data,
-                                           const UnicodeFilter* filter) const;
+                                           const TransliterationRuleData& data) const;

    /**
     * Attempt to find a matching rule at the specified point in the text.
@ -154,8 +150,7 @@ public:
    virtual TransliterationRule* findIncrementalMatch(const Replaceable& text,
                                              const UTransPosition& pos,
                                              const TransliterationRuleData& data,
-                                              UBool& isPartial,
-                                              const UnicodeFilter* filter) const;
+                                              UBool& isPartial) const;

    /**
     * Create rule strings that represents this rule set.
--- a/icu4c/source/i18n/translit.cpp
+++ b/icu4c/source/i18n/translit.cpp
@ -36,9 +36,9 @@
 #include "unicode/uniset.h"
 #include "unicode/unitohex.h"

-const UChar Transliterator::ID_SEP   = 0x002D; /*-*/
-const UChar Transliterator::ID_DELIM = 0x003B; /*;*/
-
+// keep in sync with CompoundTransliterator
+static const UChar ID_SEP      = 0x002D; /*-*/
+static const UChar ID_DELIM    = 0x003B; /*;*/
 static const UChar OPEN_PAREN  = 40;
 static const UChar CLOSE_PAREN = 41;

--- a/icu4c/source/i18n/unicode/translit.h
+++ b/icu4c/source/i18n/unicode/translit.h
@ -662,6 +662,8 @@ protected:
                                       UTransPosition& index,
                                       UBool incremental) const;

+    friend class CompoundTransliterator; // for filteredTransliterate
+
 public:

    /**
@ -1013,13 +1015,6 @@ protected:

 private:
    static void initializeCache(void);
-
-    /* IDs take the form <source> ID_SEP <target>, where
-     * <source> and <target> are (usually) script names.
-     * Compound IDs take the form <ID> ( ID_DELIM <ID> )+.
-     */
-    static const UChar ID_SEP;   // ((UChar)0x002D) /*-*/
-    static const UChar ID_DELIM; // ((UChar)0x003B) /*;*/
 };

 inline int32_t Transliterator::getMaximumContextLength(void) const {