ICU-264 improve filter handling of CompoundTransliterator

X-SVN-Rev: 722
2000-02-05 00:24:24 +00:00 · 2000-02-05 00:24:24 +00:00 · 7b6b7df37a
commit 7b6b7df37a
parent 7f7b2d90f3
6 changed files with 174 additions and 121 deletions
--- a/icu4c/source/i18n/cpdtrans.cpp
+++ b/icu4c/source/i18n/cpdtrans.cpp
@ -28,7 +28,7 @@ CompoundTransliterator::CompoundTransliterator(
                           int32_t count,
                           UnicodeFilter* adoptedFilter) :
    Transliterator(joinIDs(transliterators, count), adoptedFilter),
-    trans(0), count(0) {
+    trans(0), count(0), filters(0) {
    setTransliterators(transliterators, count);
 }

@ -42,9 +42,8 @@ CompoundTransliterator::CompoundTransliterator(
 CompoundTransliterator::CompoundTransliterator(const UnicodeString& ID,
                              Transliterator::Direction direction,
                              UnicodeFilter* adoptedFilter) :
-    Transliterator(ID, adoptedFilter) {
-    // changed MED
-    // Later, add "rule1[filter];rule2...
+    Transliterator(ID, 0), // set filter to 0 here!
+    filters(0) {
    UnicodeString* list = split(ID, ';', count);
    trans = new Transliterator*[count];
    for (int32_t i = 0; i < count; ++i) {
@ -53,6 +52,7 @@ CompoundTransliterator::CompoundTransliterator(const UnicodeString& ID,
    }
    delete[] list;
    computeMaximumContextLength();
+    adoptFilter(adoptedFilter);
 }

 /**
@ -105,7 +105,7 @@ UnicodeString* CompoundTransliterator::split(const UnicodeString& s,
 * Copy constructor.
 */
 CompoundTransliterator::CompoundTransliterator(const CompoundTransliterator& t) :
-    Transliterator(t), trans(0), count(0) {
+    Transliterator(t), trans(0), count(0), filters(0) {
    *this = t;
 }

@ -119,9 +119,14 @@ CompoundTransliterator::~CompoundTransliterator() {
 void CompoundTransliterator::freeTransliterators(void) {
    for (int32_t i=0; i<count; ++i) {
        delete trans[i];
+        if (filters != 0) {
+            delete filters[i];
+        }
    }
-    delete[] trans;    
+    delete[] trans;
+    delete[] filters;
    trans = 0;
+    filters = 0;
    count = 0;
 }

@ -135,14 +140,23 @@ CompoundTransliterator& CompoundTransliterator::operator=(
    for (i=0; i<count; ++i) {
        delete trans[i];
        trans[i] = 0;
+        if (filters != 0) {
+            delete filters[i];
+            filters[i] = 0;
+        }
    }
    if (t.count > count) {
        delete[] trans;
        trans = new Transliterator*[t.count];
+        delete[] filters;
+        filters = (t.filter == 0) ? 0 : new UnicodeFilter*[t.count];
    }
    count = t.count;
    for (i=0; i<count; ++i) {
        trans[i] = t.trans[i]->clone();
+        if (t.filters != 0) {
+            filters[i] = t.filters[i]->clone();
+        }
    }
    return *this;
 }
@ -171,7 +185,6 @@ const Transliterator& CompoundTransliterator::getTransliterator(int32_t index) c
    return *trans[index];
 }

-
 void CompoundTransliterator::setTransliterators(Transliterator* const transliterators[],
                                                int32_t transCount) {
    Transliterator** a = new Transliterator*[transCount];
@ -183,10 +196,64 @@ void CompoundTransliterator::setTransliterators(Transliterator* const transliter

 void CompoundTransliterator::adoptTransliterators(Transliterator* adoptedTransliterators[],
                                                  int32_t transCount) {
+    // First free trans[] and set count to zero.  Once this is done,
+    // orphan the filter.  Set up the new trans[], and call
+    // adoptFilter() to fix up the filters in trans[].
    freeTransliterators();
+    UnicodeFilter *f = orphanFilter();
    trans = adoptedTransliterators;
    count = transCount;
    computeMaximumContextLength();
+    adoptFilter(f);
+}
+
+/**
+ * Override Transliterator.  Modify the transliterators that make up
+ * this compound transliterator so their filters are the logical AND
+ * of this transliterator's filter and their own.  Original filters
+ * are kept in the filters array.
+ */
+void CompoundTransliterator::adoptFilter(UnicodeFilter* f) {
+    /**
+     * If there is a filter F for the compound transliterator as a
+     * whole, then we need to modify every non-null filter f in
+     * the chain to be f' = F & f.
+     *
+     * There are two possible states:
+     * 1. getFilter() != 0
+     *    original filters in filters[]
+     *    createAnd() filters in trans[]
+     * 2. getFilter() == 0
+     *    filters[] either unallocated or empty
+     *    original filters in trans[]
+     * This method must insure that we stay in one of these states.
+     */
+    if (count > 0) {
+        if (f == 0) {
+            // Restore original filters
+            if (getFilter() != 0 && filters != 0) {
+                for (int32_t i=0; i<count; ++i) {
+                    trans[i]->adoptFilter(filters[i]);
+                    filters[i] = 0;
+                }
+            }
+        } else {
+            // If the previous filter is 0, then the component filters
+            // are in trans[i], and need to be pulled out into filters[].
+            if (getFilter() == 0) {
+                if (filters == 0) {
+                    filters = new UnicodeFilter*[count];
+                }
+                for (int32_t i=0; i<count; ++i) {
+                    filters[i] = trans[i]->orphanFilter();
+                }
+            }
+            for (int32_t i=0; i<count; ++i) {
+                trans[i]->adoptFilter(UnicodeFilterLogic::createAnd(f, filters[i]));
+            }
+        }
+    }
+    Transliterator::adoptFilter(f);
 }

 /**
@ -252,28 +319,7 @@ void CompoundTransliterator::handleTransliterate(Replaceable& text, Position& in
        return; // Short circuit for empty compound transliterators
    }

-    /**
-     * One more wrinkle.  If there is a filter F for the compound
-     * transliterator as a whole, then we need to modify every
-     * non-null filter f in the chain to be f' = F & f.  Then,
-     * when we're done, we restore the original filters.
-     *
-     * A possible future optimization is to change f to f' at
-     * construction time, but then if anyone else is using the
-     * transliterators in the chain outside of this context, they
-     * will get unexpected results.
-     */
-    const UnicodeFilter* F = getFilter();
 	int32_t i;
-    UnicodeFilter** f = 0;
-    if (F != 0) {
-        f = new UnicodeFilter*[count];
-        for (i=0; i<count; ++i) {
-            f[i] = trans[i]->getFilter()->clone();
-            trans[i]->adoptFilter(UnicodeFilterLogic::createAnd(*F, *f[i]));
-        }
-    }
-
    int32_t cursor = index.cursor;
    int32_t limit = index.limit;
    int32_t globalLimit = limit;
@ -297,14 +343,6 @@ void CompoundTransliterator::handleTransliterate(Replaceable& text, Position& in
    // transliterator left it.  Limit needs to be put back
    // where it was, modulo adjustments for deletions/insertions.
    index.limit = globalLimit;
-    
-    // Fixup the transliterator filters, if we had to modify them.
-    if (f != 0) {
-        for (i=0; i<count; ++i) {
-            trans[i]->adoptFilter(f[i]);
-        }
-        delete[] f;
-    }
 }

 /**
--- a/icu4c/source/i18n/translit.cpp
+++ b/icu4c/source/i18n/translit.cpp
@ -473,6 +473,19 @@ const UnicodeFilter* Transliterator::getFilter(void) const {
    return filter;
 }

+/**
+ * Returns the filter used by this transliterator, or
+ * <tt>NULL</tt> if this transliterator uses no filter.  The
+ * caller must eventually delete the result.  After this call,
+ * this transliterator's filter is set to <tt>NULL</tt>.
+ */
+UnicodeFilter* Transliterator::orphanFilter(void) {
+    UnicodeFilter *result = filter;
+    // MUST go through adoptFilter in case latter is overridden
+    adoptFilter(0);
+    return result;
+}
+
 /**
 * Changes the filter used by this transliterator.  If the filter
 * is set to <tt>null</tt> then no filtering will occur.
--- a/icu4c/source/i18n/unicode/cpdtrans.h
+++ b/icu4c/source/i18n/unicode/cpdtrans.h
@ -32,12 +32,17 @@
 * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
 *
 * @author Alan Liu
- * @version $RCSfile: cpdtrans.h,v $ $Revision: 1.5 $ $Date: 2000/01/19 19:02:10 $
+ * @version $RCSfile: cpdtrans.h,v $ $Revision: 1.6 $ $Date: 2000/02/05 00:23:56 $
 */
 class U_I18N_API CompoundTransliterator : public Transliterator {

    Transliterator** trans;

+    /**
+     * Array of original filters associated with transliterators.
+     */
+    UnicodeFilter** filters;
+
    int32_t count;

 public:
@ -101,6 +106,14 @@ public:
    void adoptTransliterators(Transliterator* adoptedTransliterators[],
                              int32_t count);

+    /**
+     * Override Transliterator.  Modify the transliterators that make up
+     * this compound transliterator so their filters are the logical AND
+     * of this transliterator's filter and their own.  Original filters
+     * are kept in the filters array.
+     */
+    virtual void adoptFilter(UnicodeFilter* f);
+
    /**
     * Implements {@link Transliterator#handleTransliterate}.
     */
--- a/icu4c/source/i18n/unicode/translit.h
+++ b/icu4c/source/i18n/unicode/translit.h
@ -671,11 +671,19 @@ public:
                                         UnicodeString& result);

    /**
-     * Returns the filter used by this transliterator, or <tt>null</tt>
+     * Returns the filter used by this transliterator, or <tt>NULL</tt>
     * if this transliterator uses no filter.
     */
    virtual const UnicodeFilter* getFilter(void) const;

+    /**
+     * Returns the filter used by this transliterator, or <tt>NULL</tt> if this
+     * transliterator uses no filter.  The caller must eventually delete the
+     * result.  After this call, this transliterator's filter is set to
+     * <tt>NULL</tt>.  Calls adoptFilter().
+     */
+    UnicodeFilter* orphanFilter(void);
+
    /**
     * Changes the filter used by this transliterator.  If the filter
     * is set to <tt>null</tt> then no filtering will occur.
--- a/icu4c/source/i18n/unicode/unifltlg.h
+++ b/icu4c/source/i18n/unicode/unifltlg.h
@ -20,6 +20,12 @@ class UnicodeFilter;
 * filter objects that perform logical inversion (<tt>not</tt>),
 * intersection (<tt>and</tt>), or union (<tt>or</tt>) of the given
 * filter objects.
+ *
+ * If a UnicodeFilter* f is passed in, where f == NULL, then that
+ * is treated as a filter that contains all Unicode characters.
+ * Therefore, createNot(NULL) returns a filter that contains no
+ * Unicode characters.  Likewise, createAnd(g, NULL) returns g->clone(),
+ * and createAnd(NULL, NULL) returns NULL.
 */
 class U_I18N_API UnicodeFilterLogic {

@ -28,50 +34,34 @@ public:
    /**
     * Returns a <tt>UnicodeFilter</tt> that implements the inverse of
     * the given filter.
+     * @param f may be NULL
+     * @result always non-NULL
     */
-    static UnicodeFilter* createNot(const UnicodeFilter& f);
+    static UnicodeFilter* createNot(const UnicodeFilter* f);

    /**
     * Returns a <tt>UnicodeFilter</tt> that implements a short
     * circuit AND of the result of the two given filters.  That is,
     * if <tt>f.contains()</tt> is <tt>false</tt>, then <tt>g.contains()</tt>
     * is not called, and <tt>contains()</tt> returns <tt>false</tt>.
-     *
-     * <p>Either <tt>f</tt> or <tt>g</tt> must be non-null.
+     * @param f may be NULL
+     * @param g may be NULL
+     * @result will be NULL if and only if f == g == NULL
     */
-    static UnicodeFilter* createAnd(const UnicodeFilter& f,
-                                    const UnicodeFilter& g);
-
-    /**
-     * Returns a <tt>UnicodeFilter</tt> that implements a short
-     * circuit AND of the result of the given filters.  That is, if
-     * <tt>f[i].contains()</tt> is <tt>false</tt>, then
-     * <tt>f[j].contains()</tt> is not called, where <tt>j > i</tt>, and
-     * <tt>contains()</tt> returns <tt>false</tt>.
-     */
-    // static UnicodeFilter* and(const UnicodeFilter** f);
+    static UnicodeFilter* createAnd(const UnicodeFilter* f,
+                                    const UnicodeFilter* g);

    /**
     * Returns a <tt>UnicodeFilter</tt> that implements a short
     * circuit OR of the result of the two given filters.  That is, if
     * <tt>f.contains()</tt> is <tt>true</tt>, then <tt>g.contains()</tt> is
     * not called, and <tt>contains()</tt> returns <tt>true</tt>.
-     *
-     * <p>Either <tt>f</tt> or <tt>g</tt> must be non-null.
+     * @param f may be NULL
+     * @param g may be NULL
+     * @result will be NULL if and only if f == g == NULL
     */
-    static UnicodeFilter* createOr(const UnicodeFilter& f,
-                                   const UnicodeFilter& g);
-
-    /**
-     * Returns a <tt>UnicodeFilter</tt> that implements a short
-     * circuit OR of the result of the given filters.  That is, if
-     * <tt>f[i].contains()</tt> is <tt>false</tt>, then
-     * <tt>f[j].contains()</tt> is not called, where <tt>j > i</tt>, and
-     * <tt>contains()</tt> returns <tt>true</tt>.
-     */
-    // static UnicodeFilter* or(const UnicodeFilter** f);
-
-    // TODO: Add nand() & nor() for convenience, if needed.
+    static UnicodeFilter* createOr(const UnicodeFilter* f,
+                                   const UnicodeFilter* g);

 private:
    // Disallow instantiation
--- a/icu4c/source/i18n/unifltlg.cpp
+++ b/icu4c/source/i18n/unifltlg.cpp
@ -10,6 +10,21 @@
 #include "unicode/unifltlg.h"
 #include "unicode/unifilt.h"

+/**
+ * A NullFilter always returns a fixed value, either TRUE or FALSE.
+ * A filter value of 0 (that is, a UnicodeFilter* f, where f == 0)
+ * is equivalent to a NullFilter(TRUE).
+ */
+class NullFilter : public UnicodeFilter {
+    bool_t result;
+public:
+    NullFilter(bool_t r) { result = r; }
+    NullFilter(const NullFilter& f) { result = f.result; }
+    virtual ~NullFilter() {}
+    virtual bool_t contains(UChar c) const { return result; }
+    virtual UnicodeFilter* clone() const { return new NullFilter(*this); }
+};
+
 class UnicodeNotFilter : public UnicodeFilter {
    UnicodeFilter* filt;
 public:
@ -30,8 +45,12 @@ UnicodeFilter* UnicodeNotFilter::clone() const { return new UnicodeNotFilter(*th
 * Returns a <tt>UnicodeFilter</tt> that implements the inverse of
 * the given filter.
 */
-UnicodeFilter* UnicodeFilterLogic::createNot(const UnicodeFilter& f) {
-    return new UnicodeNotFilter(f.clone());
+UnicodeFilter* UnicodeFilterLogic::createNot(const UnicodeFilter* f) {
+    if (f == 0) {
+        return new NullFilter(FALSE);
+    } else {
+        return new UnicodeNotFilter(f->clone());
+    }
 }

 class UnicodeAndFilter : public UnicodeFilter {
@ -57,34 +76,21 @@ UnicodeFilter* UnicodeAndFilter::clone() const { return new UnicodeAndFilter(*th
 * circuit AND of the result of the two given filters.  That is,
 * if <tt>f.contains()</tt> is <tt>false</tt>, then <tt>g.contains()</tt>
 * is not called, and <tt>contains()</tt> returns <tt>false</tt>.
- *
- * <p>Either <tt>f</tt> or <tt>g</tt> must be non-null.
 */
-UnicodeFilter* UnicodeFilterLogic::createAnd(const UnicodeFilter& f,
-                                             const UnicodeFilter& g) {
-    return new UnicodeAndFilter(f.clone(), g.clone());
+UnicodeFilter* UnicodeFilterLogic::createAnd(const UnicodeFilter* f,
+                                             const UnicodeFilter* g) {
+    if (f == 0) {
+        if (g == 0) {
+            return NULL;
+        }
+        return g->clone();
+    }
+    if (g == 0) {
+        return f->clone();
+    }
+    return new UnicodeAndFilter(f->clone(), g->clone());
 }

-/**
- * Returns a <tt>UnicodeFilter</tt> that implements a short
- * circuit AND of the result of the given filters.  That is, if
- * <tt>f[i].contains()</tt> is <tt>false</tt>, then
- * <tt>f[j].contains()</tt> is not called, where <tt>j > i</tt>, and
- * <tt>contains()</tt> returns <tt>false</tt>.
- */
-//!UnicodeFilter* UnicodeFilterLogic::and(const UnicodeFilter** f) {
-//!    return new UnicodeFilter() {
-//!        public bool_t contains(UChar c) {
-//!            for (int32_t i=0; i<f.length; ++i) {
-//!                if (!f[i].contains(c)) {
-//!                    return FALSE;
-//!                }
-//!            }
-//!            return TRUE;
-//!        }
-//!    };
-//!}
-
 class UnicodeOrFilter : public UnicodeFilter {
    UnicodeFilter* filt1;
    UnicodeFilter* filt2;
@ -108,32 +114,17 @@ UnicodeFilter* UnicodeOrFilter::clone() const { return new UnicodeOrFilter(*this
 * circuit OR of the result of the two given filters.  That is, if
 * <tt>f.contains()</tt> is <tt>true</tt>, then <tt>g.contains()</tt> is
 * not called, and <tt>contains()</tt> returns <tt>true</tt>.
- *
- * <p>Either <tt>f</tt> or <tt>g</tt> must be non-null.
 */
-UnicodeFilter* UnicodeFilterLogic::createOr(const UnicodeFilter& f,
-                                            const UnicodeFilter& g) {
-    return new UnicodeOrFilter(f.clone(), g.clone());
+UnicodeFilter* UnicodeFilterLogic::createOr(const UnicodeFilter* f,
+                                            const UnicodeFilter* g) {
+    if (f == 0) {
+        if (g == 0) {
+            return NULL;
+        }
+        return g->clone();
+    }
+    if (g == 0) {
+        return f->clone();
+    }
+    return new UnicodeOrFilter(f->clone(), g->clone());
 }
-
-/**
- * Returns a <tt>UnicodeFilter</tt> that implements a short
- * circuit OR of the result of the given filters.  That is, if
- * <tt>f[i].contains()</tt> is <tt>false</tt>, then
- * <tt>f[j].contains()</tt> is not called, where <tt>j > i</tt>, and
- * <tt>contains()</tt> returns <tt>true</tt>.
- */
-//!UnicodeFilter* UnicodeFilterLogic::or(const UnicodeFilter** f) {
-//!    return new UnicodeFilter() {
-//!        public bool_t contains(UChar c) {
-//!            for (int32_t i=0; i<f.length; ++i) {
-//!                if (f[i].contains(c)) {
-//!                    return TRUE;
-//!                }
-//!            }
-//!            return FALSE;
-//!        }
-//!    };
-//!}
-
-// TODO: Add nand() & nor() for convenience, if needed.