ICU-264 improve filter handling of CompoundTransliterator
X-SVN-Rev: 722
This commit is contained in:
parent
7f7b2d90f3
commit
7b6b7df37a
@ -28,7 +28,7 @@ CompoundTransliterator::CompoundTransliterator(
|
||||
int32_t count,
|
||||
UnicodeFilter* adoptedFilter) :
|
||||
Transliterator(joinIDs(transliterators, count), adoptedFilter),
|
||||
trans(0), count(0) {
|
||||
trans(0), count(0), filters(0) {
|
||||
setTransliterators(transliterators, count);
|
||||
}
|
||||
|
||||
@ -42,9 +42,8 @@ CompoundTransliterator::CompoundTransliterator(
|
||||
CompoundTransliterator::CompoundTransliterator(const UnicodeString& ID,
|
||||
Transliterator::Direction direction,
|
||||
UnicodeFilter* adoptedFilter) :
|
||||
Transliterator(ID, adoptedFilter) {
|
||||
// changed MED
|
||||
// Later, add "rule1[filter];rule2...
|
||||
Transliterator(ID, 0), // set filter to 0 here!
|
||||
filters(0) {
|
||||
UnicodeString* list = split(ID, ';', count);
|
||||
trans = new Transliterator*[count];
|
||||
for (int32_t i = 0; i < count; ++i) {
|
||||
@ -53,6 +52,7 @@ CompoundTransliterator::CompoundTransliterator(const UnicodeString& ID,
|
||||
}
|
||||
delete[] list;
|
||||
computeMaximumContextLength();
|
||||
adoptFilter(adoptedFilter);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -105,7 +105,7 @@ UnicodeString* CompoundTransliterator::split(const UnicodeString& s,
|
||||
* Copy constructor.
|
||||
*/
|
||||
CompoundTransliterator::CompoundTransliterator(const CompoundTransliterator& t) :
|
||||
Transliterator(t), trans(0), count(0) {
|
||||
Transliterator(t), trans(0), count(0), filters(0) {
|
||||
*this = t;
|
||||
}
|
||||
|
||||
@ -119,9 +119,14 @@ CompoundTransliterator::~CompoundTransliterator() {
|
||||
void CompoundTransliterator::freeTransliterators(void) {
|
||||
for (int32_t i=0; i<count; ++i) {
|
||||
delete trans[i];
|
||||
if (filters != 0) {
|
||||
delete filters[i];
|
||||
}
|
||||
}
|
||||
delete[] trans;
|
||||
delete[] trans;
|
||||
delete[] filters;
|
||||
trans = 0;
|
||||
filters = 0;
|
||||
count = 0;
|
||||
}
|
||||
|
||||
@ -135,14 +140,23 @@ CompoundTransliterator& CompoundTransliterator::operator=(
|
||||
for (i=0; i<count; ++i) {
|
||||
delete trans[i];
|
||||
trans[i] = 0;
|
||||
if (filters != 0) {
|
||||
delete filters[i];
|
||||
filters[i] = 0;
|
||||
}
|
||||
}
|
||||
if (t.count > count) {
|
||||
delete[] trans;
|
||||
trans = new Transliterator*[t.count];
|
||||
delete[] filters;
|
||||
filters = (t.filter == 0) ? 0 : new UnicodeFilter*[t.count];
|
||||
}
|
||||
count = t.count;
|
||||
for (i=0; i<count; ++i) {
|
||||
trans[i] = t.trans[i]->clone();
|
||||
if (t.filters != 0) {
|
||||
filters[i] = t.filters[i]->clone();
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
@ -171,7 +185,6 @@ const Transliterator& CompoundTransliterator::getTransliterator(int32_t index) c
|
||||
return *trans[index];
|
||||
}
|
||||
|
||||
|
||||
void CompoundTransliterator::setTransliterators(Transliterator* const transliterators[],
|
||||
int32_t transCount) {
|
||||
Transliterator** a = new Transliterator*[transCount];
|
||||
@ -183,10 +196,64 @@ void CompoundTransliterator::setTransliterators(Transliterator* const transliter
|
||||
|
||||
void CompoundTransliterator::adoptTransliterators(Transliterator* adoptedTransliterators[],
|
||||
int32_t transCount) {
|
||||
// First free trans[] and set count to zero. Once this is done,
|
||||
// orphan the filter. Set up the new trans[], and call
|
||||
// adoptFilter() to fix up the filters in trans[].
|
||||
freeTransliterators();
|
||||
UnicodeFilter *f = orphanFilter();
|
||||
trans = adoptedTransliterators;
|
||||
count = transCount;
|
||||
computeMaximumContextLength();
|
||||
adoptFilter(f);
|
||||
}
|
||||
|
||||
/**
|
||||
* Override Transliterator. Modify the transliterators that make up
|
||||
* this compound transliterator so their filters are the logical AND
|
||||
* of this transliterator's filter and their own. Original filters
|
||||
* are kept in the filters array.
|
||||
*/
|
||||
void CompoundTransliterator::adoptFilter(UnicodeFilter* f) {
|
||||
/**
|
||||
* If there is a filter F for the compound transliterator as a
|
||||
* whole, then we need to modify every non-null filter f in
|
||||
* the chain to be f' = F & f.
|
||||
*
|
||||
* There are two possible states:
|
||||
* 1. getFilter() != 0
|
||||
* original filters in filters[]
|
||||
* createAnd() filters in trans[]
|
||||
* 2. getFilter() == 0
|
||||
* filters[] either unallocated or empty
|
||||
* original filters in trans[]
|
||||
* This method must insure that we stay in one of these states.
|
||||
*/
|
||||
if (count > 0) {
|
||||
if (f == 0) {
|
||||
// Restore original filters
|
||||
if (getFilter() != 0 && filters != 0) {
|
||||
for (int32_t i=0; i<count; ++i) {
|
||||
trans[i]->adoptFilter(filters[i]);
|
||||
filters[i] = 0;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// If the previous filter is 0, then the component filters
|
||||
// are in trans[i], and need to be pulled out into filters[].
|
||||
if (getFilter() == 0) {
|
||||
if (filters == 0) {
|
||||
filters = new UnicodeFilter*[count];
|
||||
}
|
||||
for (int32_t i=0; i<count; ++i) {
|
||||
filters[i] = trans[i]->orphanFilter();
|
||||
}
|
||||
}
|
||||
for (int32_t i=0; i<count; ++i) {
|
||||
trans[i]->adoptFilter(UnicodeFilterLogic::createAnd(f, filters[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
Transliterator::adoptFilter(f);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -252,28 +319,7 @@ void CompoundTransliterator::handleTransliterate(Replaceable& text, Position& in
|
||||
return; // Short circuit for empty compound transliterators
|
||||
}
|
||||
|
||||
/**
|
||||
* One more wrinkle. If there is a filter F for the compound
|
||||
* transliterator as a whole, then we need to modify every
|
||||
* non-null filter f in the chain to be f' = F & f. Then,
|
||||
* when we're done, we restore the original filters.
|
||||
*
|
||||
* A possible future optimization is to change f to f' at
|
||||
* construction time, but then if anyone else is using the
|
||||
* transliterators in the chain outside of this context, they
|
||||
* will get unexpected results.
|
||||
*/
|
||||
const UnicodeFilter* F = getFilter();
|
||||
int32_t i;
|
||||
UnicodeFilter** f = 0;
|
||||
if (F != 0) {
|
||||
f = new UnicodeFilter*[count];
|
||||
for (i=0; i<count; ++i) {
|
||||
f[i] = trans[i]->getFilter()->clone();
|
||||
trans[i]->adoptFilter(UnicodeFilterLogic::createAnd(*F, *f[i]));
|
||||
}
|
||||
}
|
||||
|
||||
int32_t cursor = index.cursor;
|
||||
int32_t limit = index.limit;
|
||||
int32_t globalLimit = limit;
|
||||
@ -297,14 +343,6 @@ void CompoundTransliterator::handleTransliterate(Replaceable& text, Position& in
|
||||
// transliterator left it. Limit needs to be put back
|
||||
// where it was, modulo adjustments for deletions/insertions.
|
||||
index.limit = globalLimit;
|
||||
|
||||
// Fixup the transliterator filters, if we had to modify them.
|
||||
if (f != 0) {
|
||||
for (i=0; i<count; ++i) {
|
||||
trans[i]->adoptFilter(f[i]);
|
||||
}
|
||||
delete[] f;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -473,6 +473,19 @@ const UnicodeFilter* Transliterator::getFilter(void) const {
|
||||
return filter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the filter used by this transliterator, or
|
||||
* <tt>NULL</tt> if this transliterator uses no filter. The
|
||||
* caller must eventually delete the result. After this call,
|
||||
* this transliterator's filter is set to <tt>NULL</tt>.
|
||||
*/
|
||||
UnicodeFilter* Transliterator::orphanFilter(void) {
|
||||
UnicodeFilter *result = filter;
|
||||
// MUST go through adoptFilter in case latter is overridden
|
||||
adoptFilter(0);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Changes the filter used by this transliterator. If the filter
|
||||
* is set to <tt>null</tt> then no filtering will occur.
|
||||
|
@ -32,12 +32,17 @@
|
||||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: cpdtrans.h,v $ $Revision: 1.5 $ $Date: 2000/01/19 19:02:10 $
|
||||
* @version $RCSfile: cpdtrans.h,v $ $Revision: 1.6 $ $Date: 2000/02/05 00:23:56 $
|
||||
*/
|
||||
class U_I18N_API CompoundTransliterator : public Transliterator {
|
||||
|
||||
Transliterator** trans;
|
||||
|
||||
/**
|
||||
* Array of original filters associated with transliterators.
|
||||
*/
|
||||
UnicodeFilter** filters;
|
||||
|
||||
int32_t count;
|
||||
|
||||
public:
|
||||
@ -101,6 +106,14 @@ public:
|
||||
void adoptTransliterators(Transliterator* adoptedTransliterators[],
|
||||
int32_t count);
|
||||
|
||||
/**
|
||||
* Override Transliterator. Modify the transliterators that make up
|
||||
* this compound transliterator so their filters are the logical AND
|
||||
* of this transliterator's filter and their own. Original filters
|
||||
* are kept in the filters array.
|
||||
*/
|
||||
virtual void adoptFilter(UnicodeFilter* f);
|
||||
|
||||
/**
|
||||
* Implements {@link Transliterator#handleTransliterate}.
|
||||
*/
|
||||
|
@ -671,11 +671,19 @@ public:
|
||||
UnicodeString& result);
|
||||
|
||||
/**
|
||||
* Returns the filter used by this transliterator, or <tt>null</tt>
|
||||
* Returns the filter used by this transliterator, or <tt>NULL</tt>
|
||||
* if this transliterator uses no filter.
|
||||
*/
|
||||
virtual const UnicodeFilter* getFilter(void) const;
|
||||
|
||||
/**
|
||||
* Returns the filter used by this transliterator, or <tt>NULL</tt> if this
|
||||
* transliterator uses no filter. The caller must eventually delete the
|
||||
* result. After this call, this transliterator's filter is set to
|
||||
* <tt>NULL</tt>. Calls adoptFilter().
|
||||
*/
|
||||
UnicodeFilter* orphanFilter(void);
|
||||
|
||||
/**
|
||||
* Changes the filter used by this transliterator. If the filter
|
||||
* is set to <tt>null</tt> then no filtering will occur.
|
||||
|
@ -20,6 +20,12 @@ class UnicodeFilter;
|
||||
* filter objects that perform logical inversion (<tt>not</tt>),
|
||||
* intersection (<tt>and</tt>), or union (<tt>or</tt>) of the given
|
||||
* filter objects.
|
||||
*
|
||||
* If a UnicodeFilter* f is passed in, where f == NULL, then that
|
||||
* is treated as a filter that contains all Unicode characters.
|
||||
* Therefore, createNot(NULL) returns a filter that contains no
|
||||
* Unicode characters. Likewise, createAnd(g, NULL) returns g->clone(),
|
||||
* and createAnd(NULL, NULL) returns NULL.
|
||||
*/
|
||||
class U_I18N_API UnicodeFilterLogic {
|
||||
|
||||
@ -28,50 +34,34 @@ public:
|
||||
/**
|
||||
* Returns a <tt>UnicodeFilter</tt> that implements the inverse of
|
||||
* the given filter.
|
||||
* @param f may be NULL
|
||||
* @result always non-NULL
|
||||
*/
|
||||
static UnicodeFilter* createNot(const UnicodeFilter& f);
|
||||
static UnicodeFilter* createNot(const UnicodeFilter* f);
|
||||
|
||||
/**
|
||||
* Returns a <tt>UnicodeFilter</tt> that implements a short
|
||||
* circuit AND of the result of the two given filters. That is,
|
||||
* if <tt>f.contains()</tt> is <tt>false</tt>, then <tt>g.contains()</tt>
|
||||
* is not called, and <tt>contains()</tt> returns <tt>false</tt>.
|
||||
*
|
||||
* <p>Either <tt>f</tt> or <tt>g</tt> must be non-null.
|
||||
* @param f may be NULL
|
||||
* @param g may be NULL
|
||||
* @result will be NULL if and only if f == g == NULL
|
||||
*/
|
||||
static UnicodeFilter* createAnd(const UnicodeFilter& f,
|
||||
const UnicodeFilter& g);
|
||||
|
||||
/**
|
||||
* Returns a <tt>UnicodeFilter</tt> that implements a short
|
||||
* circuit AND of the result of the given filters. That is, if
|
||||
* <tt>f[i].contains()</tt> is <tt>false</tt>, then
|
||||
* <tt>f[j].contains()</tt> is not called, where <tt>j > i</tt>, and
|
||||
* <tt>contains()</tt> returns <tt>false</tt>.
|
||||
*/
|
||||
// static UnicodeFilter* and(const UnicodeFilter** f);
|
||||
static UnicodeFilter* createAnd(const UnicodeFilter* f,
|
||||
const UnicodeFilter* g);
|
||||
|
||||
/**
|
||||
* Returns a <tt>UnicodeFilter</tt> that implements a short
|
||||
* circuit OR of the result of the two given filters. That is, if
|
||||
* <tt>f.contains()</tt> is <tt>true</tt>, then <tt>g.contains()</tt> is
|
||||
* not called, and <tt>contains()</tt> returns <tt>true</tt>.
|
||||
*
|
||||
* <p>Either <tt>f</tt> or <tt>g</tt> must be non-null.
|
||||
* @param f may be NULL
|
||||
* @param g may be NULL
|
||||
* @result will be NULL if and only if f == g == NULL
|
||||
*/
|
||||
static UnicodeFilter* createOr(const UnicodeFilter& f,
|
||||
const UnicodeFilter& g);
|
||||
|
||||
/**
|
||||
* Returns a <tt>UnicodeFilter</tt> that implements a short
|
||||
* circuit OR of the result of the given filters. That is, if
|
||||
* <tt>f[i].contains()</tt> is <tt>false</tt>, then
|
||||
* <tt>f[j].contains()</tt> is not called, where <tt>j > i</tt>, and
|
||||
* <tt>contains()</tt> returns <tt>true</tt>.
|
||||
*/
|
||||
// static UnicodeFilter* or(const UnicodeFilter** f);
|
||||
|
||||
// TODO: Add nand() & nor() for convenience, if needed.
|
||||
static UnicodeFilter* createOr(const UnicodeFilter* f,
|
||||
const UnicodeFilter* g);
|
||||
|
||||
private:
|
||||
// Disallow instantiation
|
||||
|
@ -10,6 +10,21 @@
|
||||
#include "unicode/unifltlg.h"
|
||||
#include "unicode/unifilt.h"
|
||||
|
||||
/**
|
||||
* A NullFilter always returns a fixed value, either TRUE or FALSE.
|
||||
* A filter value of 0 (that is, a UnicodeFilter* f, where f == 0)
|
||||
* is equivalent to a NullFilter(TRUE).
|
||||
*/
|
||||
class NullFilter : public UnicodeFilter {
|
||||
bool_t result;
|
||||
public:
|
||||
NullFilter(bool_t r) { result = r; }
|
||||
NullFilter(const NullFilter& f) { result = f.result; }
|
||||
virtual ~NullFilter() {}
|
||||
virtual bool_t contains(UChar c) const { return result; }
|
||||
virtual UnicodeFilter* clone() const { return new NullFilter(*this); }
|
||||
};
|
||||
|
||||
class UnicodeNotFilter : public UnicodeFilter {
|
||||
UnicodeFilter* filt;
|
||||
public:
|
||||
@ -30,8 +45,12 @@ UnicodeFilter* UnicodeNotFilter::clone() const { return new UnicodeNotFilter(*th
|
||||
* Returns a <tt>UnicodeFilter</tt> that implements the inverse of
|
||||
* the given filter.
|
||||
*/
|
||||
UnicodeFilter* UnicodeFilterLogic::createNot(const UnicodeFilter& f) {
|
||||
return new UnicodeNotFilter(f.clone());
|
||||
UnicodeFilter* UnicodeFilterLogic::createNot(const UnicodeFilter* f) {
|
||||
if (f == 0) {
|
||||
return new NullFilter(FALSE);
|
||||
} else {
|
||||
return new UnicodeNotFilter(f->clone());
|
||||
}
|
||||
}
|
||||
|
||||
class UnicodeAndFilter : public UnicodeFilter {
|
||||
@ -57,34 +76,21 @@ UnicodeFilter* UnicodeAndFilter::clone() const { return new UnicodeAndFilter(*th
|
||||
* circuit AND of the result of the two given filters. That is,
|
||||
* if <tt>f.contains()</tt> is <tt>false</tt>, then <tt>g.contains()</tt>
|
||||
* is not called, and <tt>contains()</tt> returns <tt>false</tt>.
|
||||
*
|
||||
* <p>Either <tt>f</tt> or <tt>g</tt> must be non-null.
|
||||
*/
|
||||
UnicodeFilter* UnicodeFilterLogic::createAnd(const UnicodeFilter& f,
|
||||
const UnicodeFilter& g) {
|
||||
return new UnicodeAndFilter(f.clone(), g.clone());
|
||||
UnicodeFilter* UnicodeFilterLogic::createAnd(const UnicodeFilter* f,
|
||||
const UnicodeFilter* g) {
|
||||
if (f == 0) {
|
||||
if (g == 0) {
|
||||
return NULL;
|
||||
}
|
||||
return g->clone();
|
||||
}
|
||||
if (g == 0) {
|
||||
return f->clone();
|
||||
}
|
||||
return new UnicodeAndFilter(f->clone(), g->clone());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a <tt>UnicodeFilter</tt> that implements a short
|
||||
* circuit AND of the result of the given filters. That is, if
|
||||
* <tt>f[i].contains()</tt> is <tt>false</tt>, then
|
||||
* <tt>f[j].contains()</tt> is not called, where <tt>j > i</tt>, and
|
||||
* <tt>contains()</tt> returns <tt>false</tt>.
|
||||
*/
|
||||
//!UnicodeFilter* UnicodeFilterLogic::and(const UnicodeFilter** f) {
|
||||
//! return new UnicodeFilter() {
|
||||
//! public bool_t contains(UChar c) {
|
||||
//! for (int32_t i=0; i<f.length; ++i) {
|
||||
//! if (!f[i].contains(c)) {
|
||||
//! return FALSE;
|
||||
//! }
|
||||
//! }
|
||||
//! return TRUE;
|
||||
//! }
|
||||
//! };
|
||||
//!}
|
||||
|
||||
class UnicodeOrFilter : public UnicodeFilter {
|
||||
UnicodeFilter* filt1;
|
||||
UnicodeFilter* filt2;
|
||||
@ -108,32 +114,17 @@ UnicodeFilter* UnicodeOrFilter::clone() const { return new UnicodeOrFilter(*this
|
||||
* circuit OR of the result of the two given filters. That is, if
|
||||
* <tt>f.contains()</tt> is <tt>true</tt>, then <tt>g.contains()</tt> is
|
||||
* not called, and <tt>contains()</tt> returns <tt>true</tt>.
|
||||
*
|
||||
* <p>Either <tt>f</tt> or <tt>g</tt> must be non-null.
|
||||
*/
|
||||
UnicodeFilter* UnicodeFilterLogic::createOr(const UnicodeFilter& f,
|
||||
const UnicodeFilter& g) {
|
||||
return new UnicodeOrFilter(f.clone(), g.clone());
|
||||
UnicodeFilter* UnicodeFilterLogic::createOr(const UnicodeFilter* f,
|
||||
const UnicodeFilter* g) {
|
||||
if (f == 0) {
|
||||
if (g == 0) {
|
||||
return NULL;
|
||||
}
|
||||
return g->clone();
|
||||
}
|
||||
if (g == 0) {
|
||||
return f->clone();
|
||||
}
|
||||
return new UnicodeOrFilter(f->clone(), g->clone());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a <tt>UnicodeFilter</tt> that implements a short
|
||||
* circuit OR of the result of the given filters. That is, if
|
||||
* <tt>f[i].contains()</tt> is <tt>false</tt>, then
|
||||
* <tt>f[j].contains()</tt> is not called, where <tt>j > i</tt>, and
|
||||
* <tt>contains()</tt> returns <tt>true</tt>.
|
||||
*/
|
||||
//!UnicodeFilter* UnicodeFilterLogic::or(const UnicodeFilter** f) {
|
||||
//! return new UnicodeFilter() {
|
||||
//! public bool_t contains(UChar c) {
|
||||
//! for (int32_t i=0; i<f.length; ++i) {
|
||||
//! if (f[i].contains(c)) {
|
||||
//! return TRUE;
|
||||
//! }
|
||||
//! }
|
||||
//! return FALSE;
|
||||
//! }
|
||||
//! };
|
||||
//!}
|
||||
|
||||
// TODO: Add nand() & nor() for convenience, if needed.
|
||||
|
Loading…
Reference in New Issue
Block a user