ICU-264 improve filter handling of CompoundTransliterator

X-SVN-Rev: 722
This commit is contained in:
Alan Liu 2000-02-05 00:24:24 +00:00
parent 7f7b2d90f3
commit 7b6b7df37a
6 changed files with 174 additions and 121 deletions

View File

@ -28,7 +28,7 @@ CompoundTransliterator::CompoundTransliterator(
int32_t count,
UnicodeFilter* adoptedFilter) :
Transliterator(joinIDs(transliterators, count), adoptedFilter),
trans(0), count(0) {
trans(0), count(0), filters(0) {
setTransliterators(transliterators, count);
}
@ -42,9 +42,8 @@ CompoundTransliterator::CompoundTransliterator(
CompoundTransliterator::CompoundTransliterator(const UnicodeString& ID,
Transliterator::Direction direction,
UnicodeFilter* adoptedFilter) :
Transliterator(ID, adoptedFilter) {
// changed MED
// Later, add "rule1[filter];rule2...
Transliterator(ID, 0), // set filter to 0 here!
filters(0) {
UnicodeString* list = split(ID, ';', count);
trans = new Transliterator*[count];
for (int32_t i = 0; i < count; ++i) {
@ -53,6 +52,7 @@ CompoundTransliterator::CompoundTransliterator(const UnicodeString& ID,
}
delete[] list;
computeMaximumContextLength();
adoptFilter(adoptedFilter);
}
/**
@ -105,7 +105,7 @@ UnicodeString* CompoundTransliterator::split(const UnicodeString& s,
* Copy constructor.
*/
CompoundTransliterator::CompoundTransliterator(const CompoundTransliterator& t) :
Transliterator(t), trans(0), count(0) {
Transliterator(t), trans(0), count(0), filters(0) {
*this = t;
}
@ -119,9 +119,14 @@ CompoundTransliterator::~CompoundTransliterator() {
void CompoundTransliterator::freeTransliterators(void) {
for (int32_t i=0; i<count; ++i) {
delete trans[i];
if (filters != 0) {
delete filters[i];
}
}
delete[] trans;
delete[] trans;
delete[] filters;
trans = 0;
filters = 0;
count = 0;
}
@ -135,14 +140,23 @@ CompoundTransliterator& CompoundTransliterator::operator=(
for (i=0; i<count; ++i) {
delete trans[i];
trans[i] = 0;
if (filters != 0) {
delete filters[i];
filters[i] = 0;
}
}
if (t.count > count) {
delete[] trans;
trans = new Transliterator*[t.count];
delete[] filters;
filters = (t.filter == 0) ? 0 : new UnicodeFilter*[t.count];
}
count = t.count;
for (i=0; i<count; ++i) {
trans[i] = t.trans[i]->clone();
if (t.filters != 0) {
filters[i] = t.filters[i]->clone();
}
}
return *this;
}
@ -171,7 +185,6 @@ const Transliterator& CompoundTransliterator::getTransliterator(int32_t index) c
return *trans[index];
}
void CompoundTransliterator::setTransliterators(Transliterator* const transliterators[],
int32_t transCount) {
Transliterator** a = new Transliterator*[transCount];
@ -183,10 +196,64 @@ void CompoundTransliterator::setTransliterators(Transliterator* const transliter
void CompoundTransliterator::adoptTransliterators(Transliterator* adoptedTransliterators[],
int32_t transCount) {
// First free trans[] and set count to zero. Once this is done,
// orphan the filter. Set up the new trans[], and call
// adoptFilter() to fix up the filters in trans[].
freeTransliterators();
UnicodeFilter *f = orphanFilter();
trans = adoptedTransliterators;
count = transCount;
computeMaximumContextLength();
adoptFilter(f);
}
/**
* Override Transliterator. Modify the transliterators that make up
* this compound transliterator so their filters are the logical AND
* of this transliterator's filter and their own. Original filters
* are kept in the filters array.
*/
void CompoundTransliterator::adoptFilter(UnicodeFilter* f) {
/**
* If there is a filter F for the compound transliterator as a
* whole, then we need to modify every non-null filter f in
* the chain to be f' = F & f.
*
* There are two possible states:
* 1. getFilter() != 0
* original filters in filters[]
* createAnd() filters in trans[]
* 2. getFilter() == 0
* filters[] either unallocated or empty
* original filters in trans[]
* This method must insure that we stay in one of these states.
*/
if (count > 0) {
if (f == 0) {
// Restore original filters
if (getFilter() != 0 && filters != 0) {
for (int32_t i=0; i<count; ++i) {
trans[i]->adoptFilter(filters[i]);
filters[i] = 0;
}
}
} else {
// If the previous filter is 0, then the component filters
// are in trans[i], and need to be pulled out into filters[].
if (getFilter() == 0) {
if (filters == 0) {
filters = new UnicodeFilter*[count];
}
for (int32_t i=0; i<count; ++i) {
filters[i] = trans[i]->orphanFilter();
}
}
for (int32_t i=0; i<count; ++i) {
trans[i]->adoptFilter(UnicodeFilterLogic::createAnd(f, filters[i]));
}
}
}
Transliterator::adoptFilter(f);
}
/**
@ -252,28 +319,7 @@ void CompoundTransliterator::handleTransliterate(Replaceable& text, Position& in
return; // Short circuit for empty compound transliterators
}
/**
* One more wrinkle. If there is a filter F for the compound
* transliterator as a whole, then we need to modify every
* non-null filter f in the chain to be f' = F & f. Then,
* when we're done, we restore the original filters.
*
* A possible future optimization is to change f to f' at
* construction time, but then if anyone else is using the
* transliterators in the chain outside of this context, they
* will get unexpected results.
*/
const UnicodeFilter* F = getFilter();
int32_t i;
UnicodeFilter** f = 0;
if (F != 0) {
f = new UnicodeFilter*[count];
for (i=0; i<count; ++i) {
f[i] = trans[i]->getFilter()->clone();
trans[i]->adoptFilter(UnicodeFilterLogic::createAnd(*F, *f[i]));
}
}
int32_t cursor = index.cursor;
int32_t limit = index.limit;
int32_t globalLimit = limit;
@ -297,14 +343,6 @@ void CompoundTransliterator::handleTransliterate(Replaceable& text, Position& in
// transliterator left it. Limit needs to be put back
// where it was, modulo adjustments for deletions/insertions.
index.limit = globalLimit;
// Fixup the transliterator filters, if we had to modify them.
if (f != 0) {
for (i=0; i<count; ++i) {
trans[i]->adoptFilter(f[i]);
}
delete[] f;
}
}
/**

View File

@ -473,6 +473,19 @@ const UnicodeFilter* Transliterator::getFilter(void) const {
return filter;
}
/**
* Returns the filter used by this transliterator, or
* <tt>NULL</tt> if this transliterator uses no filter. The
* caller must eventually delete the result. After this call,
* this transliterator's filter is set to <tt>NULL</tt>.
*/
UnicodeFilter* Transliterator::orphanFilter(void) {
UnicodeFilter *result = filter;
// MUST go through adoptFilter in case latter is overridden
adoptFilter(0);
return result;
}
/**
* Changes the filter used by this transliterator. If the filter
* is set to <tt>null</tt> then no filtering will occur.

View File

@ -32,12 +32,17 @@
* <p>Copyright &copy; IBM Corporation 1999. All rights reserved.
*
* @author Alan Liu
* @version $RCSfile: cpdtrans.h,v $ $Revision: 1.5 $ $Date: 2000/01/19 19:02:10 $
* @version $RCSfile: cpdtrans.h,v $ $Revision: 1.6 $ $Date: 2000/02/05 00:23:56 $
*/
class U_I18N_API CompoundTransliterator : public Transliterator {
Transliterator** trans;
/**
* Array of original filters associated with transliterators.
*/
UnicodeFilter** filters;
int32_t count;
public:
@ -101,6 +106,14 @@ public:
void adoptTransliterators(Transliterator* adoptedTransliterators[],
int32_t count);
/**
* Override Transliterator. Modify the transliterators that make up
* this compound transliterator so their filters are the logical AND
* of this transliterator's filter and their own. Original filters
* are kept in the filters array.
*/
virtual void adoptFilter(UnicodeFilter* f);
/**
* Implements {@link Transliterator#handleTransliterate}.
*/

View File

@ -671,11 +671,19 @@ public:
UnicodeString& result);
/**
* Returns the filter used by this transliterator, or <tt>null</tt>
* Returns the filter used by this transliterator, or <tt>NULL</tt>
* if this transliterator uses no filter.
*/
virtual const UnicodeFilter* getFilter(void) const;
/**
* Returns the filter used by this transliterator, or <tt>NULL</tt> if this
* transliterator uses no filter. The caller must eventually delete the
* result. After this call, this transliterator's filter is set to
* <tt>NULL</tt>. Calls adoptFilter().
*/
UnicodeFilter* orphanFilter(void);
/**
* Changes the filter used by this transliterator. If the filter
* is set to <tt>null</tt> then no filtering will occur.

View File

@ -20,6 +20,12 @@ class UnicodeFilter;
* filter objects that perform logical inversion (<tt>not</tt>),
* intersection (<tt>and</tt>), or union (<tt>or</tt>) of the given
* filter objects.
*
* If a UnicodeFilter* f is passed in, where f == NULL, then that
* is treated as a filter that contains all Unicode characters.
* Therefore, createNot(NULL) returns a filter that contains no
* Unicode characters. Likewise, createAnd(g, NULL) returns g->clone(),
* and createAnd(NULL, NULL) returns NULL.
*/
class U_I18N_API UnicodeFilterLogic {
@ -28,50 +34,34 @@ public:
/**
* Returns a <tt>UnicodeFilter</tt> that implements the inverse of
* the given filter.
* @param f may be NULL
* @result always non-NULL
*/
static UnicodeFilter* createNot(const UnicodeFilter& f);
static UnicodeFilter* createNot(const UnicodeFilter* f);
/**
* Returns a <tt>UnicodeFilter</tt> that implements a short
* circuit AND of the result of the two given filters. That is,
* if <tt>f.contains()</tt> is <tt>false</tt>, then <tt>g.contains()</tt>
* is not called, and <tt>contains()</tt> returns <tt>false</tt>.
*
* <p>Either <tt>f</tt> or <tt>g</tt> must be non-null.
* @param f may be NULL
* @param g may be NULL
* @result will be NULL if and only if f == g == NULL
*/
static UnicodeFilter* createAnd(const UnicodeFilter& f,
const UnicodeFilter& g);
/**
* Returns a <tt>UnicodeFilter</tt> that implements a short
* circuit AND of the result of the given filters. That is, if
* <tt>f[i].contains()</tt> is <tt>false</tt>, then
* <tt>f[j].contains()</tt> is not called, where <tt>j > i</tt>, and
* <tt>contains()</tt> returns <tt>false</tt>.
*/
// static UnicodeFilter* and(const UnicodeFilter** f);
static UnicodeFilter* createAnd(const UnicodeFilter* f,
const UnicodeFilter* g);
/**
* Returns a <tt>UnicodeFilter</tt> that implements a short
* circuit OR of the result of the two given filters. That is, if
* <tt>f.contains()</tt> is <tt>true</tt>, then <tt>g.contains()</tt> is
* not called, and <tt>contains()</tt> returns <tt>true</tt>.
*
* <p>Either <tt>f</tt> or <tt>g</tt> must be non-null.
* @param f may be NULL
* @param g may be NULL
* @result will be NULL if and only if f == g == NULL
*/
static UnicodeFilter* createOr(const UnicodeFilter& f,
const UnicodeFilter& g);
/**
* Returns a <tt>UnicodeFilter</tt> that implements a short
* circuit OR of the result of the given filters. That is, if
* <tt>f[i].contains()</tt> is <tt>false</tt>, then
* <tt>f[j].contains()</tt> is not called, where <tt>j > i</tt>, and
* <tt>contains()</tt> returns <tt>true</tt>.
*/
// static UnicodeFilter* or(const UnicodeFilter** f);
// TODO: Add nand() & nor() for convenience, if needed.
static UnicodeFilter* createOr(const UnicodeFilter* f,
const UnicodeFilter* g);
private:
// Disallow instantiation

View File

@ -10,6 +10,21 @@
#include "unicode/unifltlg.h"
#include "unicode/unifilt.h"
/**
* A NullFilter always returns a fixed value, either TRUE or FALSE.
* A filter value of 0 (that is, a UnicodeFilter* f, where f == 0)
* is equivalent to a NullFilter(TRUE).
*/
class NullFilter : public UnicodeFilter {
bool_t result;
public:
NullFilter(bool_t r) { result = r; }
NullFilter(const NullFilter& f) { result = f.result; }
virtual ~NullFilter() {}
virtual bool_t contains(UChar c) const { return result; }
virtual UnicodeFilter* clone() const { return new NullFilter(*this); }
};
class UnicodeNotFilter : public UnicodeFilter {
UnicodeFilter* filt;
public:
@ -30,8 +45,12 @@ UnicodeFilter* UnicodeNotFilter::clone() const { return new UnicodeNotFilter(*th
* Returns a <tt>UnicodeFilter</tt> that implements the inverse of
* the given filter.
*/
UnicodeFilter* UnicodeFilterLogic::createNot(const UnicodeFilter& f) {
return new UnicodeNotFilter(f.clone());
UnicodeFilter* UnicodeFilterLogic::createNot(const UnicodeFilter* f) {
if (f == 0) {
return new NullFilter(FALSE);
} else {
return new UnicodeNotFilter(f->clone());
}
}
class UnicodeAndFilter : public UnicodeFilter {
@ -57,34 +76,21 @@ UnicodeFilter* UnicodeAndFilter::clone() const { return new UnicodeAndFilter(*th
* circuit AND of the result of the two given filters. That is,
* if <tt>f.contains()</tt> is <tt>false</tt>, then <tt>g.contains()</tt>
* is not called, and <tt>contains()</tt> returns <tt>false</tt>.
*
* <p>Either <tt>f</tt> or <tt>g</tt> must be non-null.
*/
UnicodeFilter* UnicodeFilterLogic::createAnd(const UnicodeFilter& f,
const UnicodeFilter& g) {
return new UnicodeAndFilter(f.clone(), g.clone());
UnicodeFilter* UnicodeFilterLogic::createAnd(const UnicodeFilter* f,
const UnicodeFilter* g) {
if (f == 0) {
if (g == 0) {
return NULL;
}
return g->clone();
}
if (g == 0) {
return f->clone();
}
return new UnicodeAndFilter(f->clone(), g->clone());
}
/**
* Returns a <tt>UnicodeFilter</tt> that implements a short
* circuit AND of the result of the given filters. That is, if
* <tt>f[i].contains()</tt> is <tt>false</tt>, then
* <tt>f[j].contains()</tt> is not called, where <tt>j > i</tt>, and
* <tt>contains()</tt> returns <tt>false</tt>.
*/
//!UnicodeFilter* UnicodeFilterLogic::and(const UnicodeFilter** f) {
//! return new UnicodeFilter() {
//! public bool_t contains(UChar c) {
//! for (int32_t i=0; i<f.length; ++i) {
//! if (!f[i].contains(c)) {
//! return FALSE;
//! }
//! }
//! return TRUE;
//! }
//! };
//!}
class UnicodeOrFilter : public UnicodeFilter {
UnicodeFilter* filt1;
UnicodeFilter* filt2;
@ -108,32 +114,17 @@ UnicodeFilter* UnicodeOrFilter::clone() const { return new UnicodeOrFilter(*this
* circuit OR of the result of the two given filters. That is, if
* <tt>f.contains()</tt> is <tt>true</tt>, then <tt>g.contains()</tt> is
* not called, and <tt>contains()</tt> returns <tt>true</tt>.
*
* <p>Either <tt>f</tt> or <tt>g</tt> must be non-null.
*/
UnicodeFilter* UnicodeFilterLogic::createOr(const UnicodeFilter& f,
const UnicodeFilter& g) {
return new UnicodeOrFilter(f.clone(), g.clone());
UnicodeFilter* UnicodeFilterLogic::createOr(const UnicodeFilter* f,
const UnicodeFilter* g) {
if (f == 0) {
if (g == 0) {
return NULL;
}
return g->clone();
}
if (g == 0) {
return f->clone();
}
return new UnicodeOrFilter(f->clone(), g->clone());
}
/**
* Returns a <tt>UnicodeFilter</tt> that implements a short
* circuit OR of the result of the given filters. That is, if
* <tt>f[i].contains()</tt> is <tt>false</tt>, then
* <tt>f[j].contains()</tt> is not called, where <tt>j > i</tt>, and
* <tt>contains()</tt> returns <tt>true</tt>.
*/
//!UnicodeFilter* UnicodeFilterLogic::or(const UnicodeFilter** f) {
//! return new UnicodeFilter() {
//! public bool_t contains(UChar c) {
//! for (int32_t i=0; i<f.length; ++i) {
//! if (f[i].contains(c)) {
//! return TRUE;
//! }
//! }
//! return FALSE;
//! }
//! };
//!}
// TODO: Add nand() & nor() for convenience, if needed.