ICU-199 compound ID support

X-SVN-Rev: 585
This commit is contained in:
Alan Liu 2000-01-14 21:15:25 +00:00
parent cc305ed674
commit d2185dcbe6
6 changed files with 131 additions and 60 deletions

View File

@ -11,12 +11,6 @@
#include "unicode/unifilt.h"
#include "unicode/unifltlg.h"
CompoundTransliterator::CompoundTransliterator(const UnicodeString& ID,
UnicodeFilter* adoptedFilter) :
Transliterator(ID,adoptedFilter),
trans(0), count(0) {
}
/**
* Constructs a new compound transliterator given an array of
* transliterators. The array of transliterators may be of any
@ -29,13 +23,80 @@ CompoundTransliterator::CompoundTransliterator(const UnicodeString& ID,
* altered by this transliterator. If <tt>filter</tt> is
* <tt>null</tt> then no filtering is applied.
*/
CompoundTransliterator::CompoundTransliterator(const UnicodeString& ID,
CompoundTransliterator::CompoundTransliterator(
Transliterator* const transliterators[],
int32_t transCount,
int32_t count,
UnicodeFilter* adoptedFilter) :
Transliterator(ID,adoptedFilter),
Transliterator(joinIDs(transliterators, count), adoptedFilter),
trans(0), count(0) {
setTransliterators(transliterators, transCount);
setTransliterators(transliterators, count);
}
/**
* Splits an ID of the form "ID;ID;..." into a compound using each
* of the IDs.
* @param ID of above form
* @param forward if false, does the list in reverse order, and
* takes the inverse of each ID.
*/
CompoundTransliterator::CompoundTransliterator(const UnicodeString& ID,
Transliterator::Direction direction,
UnicodeFilter* adoptedFilter) :
Transliterator(ID, adoptedFilter) {
// changed MED
// Later, add "rule1[filter];rule2...
UnicodeString* list = split(ID, ';', count);
trans = new Transliterator*[count];
for (int32_t i = 0; i < count; ++i) {
trans[i] = createInstance(list[direction==FORWARD ? i : (count-1-i)],
direction);
}
delete[] list;
}
/**
* Return the IDs of the given list of transliterators, concatenated
* with ';' delimiting them. Equivalent to the perlish expression
* join(';', map($_.getID(), transliterators).
*/
UnicodeString CompoundTransliterator::joinIDs(Transliterator* const transliterators[],
int32_t count) {
UnicodeString id;
for (int32_t i=0; i<count; ++i) {
if (i > 0) {
id.append((UChar)';');
}
id.append(transliterators[i]->getID());
}
return id; // Return temporary
}
/**
* Splits a string, as in JavaScript
*/
UnicodeString* CompoundTransliterator::split(const UnicodeString& s,
UChar divider,
int32_t& count) {
// changed MED
// see how many there are
count = 1;
for (int32_t i = 0; i < s.length(); ++i) {
if (s.charAt(i) == divider) ++count;
}
// make an array with them
UnicodeString* result = new UnicodeString[count];
int32_t last = 0;
int32_t current = 0;
int32_t i;
for (i = 0; i < s.length(); ++i) {
if (s.charAt(i) == divider) {
s.extractBetween(last, i, result[current++]);
last = i+1;
}
}
s.extractBetween(last, i, result[current]);
return result;
}
/**

View File

@ -23,6 +23,7 @@
#include "unicode/unitohex.h"
#include "unicode/nultrans.h"
#include "unicode/putil.h"
#include "unicode/cpdtrans.h"
/**
* Dictionary of known transliterators. Keys are <code>String</code>
@ -522,15 +523,7 @@ void Transliterator::adoptFilter(UnicodeFilter* filterToAdopt) {
* @see #registerInstance
*/
Transliterator* Transliterator::createInverse(void) const {
int32_t i = ID.indexOf((UChar)'-');
if (i >= 0) {
UnicodeString inverseID, right;
ID.extractBetween(i+1, ID.length(), inverseID);
ID.extractBetween(0, i, right);
inverseID.append((UChar)'-').append(right);
return _createInstance(inverseID);
}
return 0;
return Transliterator::createInstance(ID, REVERSE);
}
/**
@ -545,8 +538,24 @@ Transliterator* Transliterator::createInverse(void) const {
* @see #getAvailableIDs
* @see #getID
*/
Transliterator* Transliterator::createInstance(const UnicodeString& ID) {
Transliterator* t = _createInstance(ID);
Transliterator* Transliterator::createInstance(const UnicodeString& ID,
Transliterator::Direction dir) {
if (ID.indexOf(';') >= 0) {
return new CompoundTransliterator(ID, dir, 0);
}
Transliterator* t = 0;
if (dir == REVERSE) {
int32_t i = ID.indexOf((UChar)'-');
if (i >= 0) {
UnicodeString inverseID, right;
ID.extractBetween(i+1, ID.length(), inverseID);
ID.extractBetween(0, i, right);
inverseID.append((UChar)'-').append(right);
t = _createInstance(inverseID);
}
} else {
t = _createInstance(ID);
}
return t;
}

View File

@ -32,7 +32,7 @@
* <p>Copyright &copy; IBM Corporation 1999. All rights reserved.
*
* @author Alan Liu
* @version $RCSfile: cpdtrans.h,v $ $Revision: 1.1 $ $Date: 1999/12/28 23:54:20 $
* @version $RCSfile: cpdtrans.h,v $ $Revision: 1.2 $ $Date: 2000/01/14 21:15:13 $
*/
class U_I18N_API CompoundTransliterator : public Transliterator {
@ -54,12 +54,12 @@ public:
* altered by this transliterator. If <tt>filter</tt> is
* <tt>null</tt> then no filtering is applied.
*/
CompoundTransliterator(const UnicodeString& ID,
Transliterator* const transliterators[],
CompoundTransliterator(Transliterator* const transliterators[],
int32_t count,
UnicodeFilter* adoptedFilter = 0);
CompoundTransliterator(const UnicodeString& ID,
Direction dir = FORWARD,
UnicodeFilter* adoptedFilter = 0);
/**
@ -128,6 +128,21 @@ public:
private:
/**
* Return the IDs of the given list of transliterators, concatenated
* with ';' delimiting them. Equivalent to the perlish expression
* join(';', map($_.getID(), transliterators).
*/
UnicodeString joinIDs(Transliterator* const transliterators[],
int32_t count);
/**
* Splits a string, as in JavaScript
*/
UnicodeString* split(const UnicodeString& s,
UChar divider,
int32_t& count);
void freeTransliterators(void);
};
#endif

View File

@ -221,24 +221,6 @@ class U_I18N_API RuleBasedTransliterator : public Transliterator {
public:
/**
* Direction constant passed to constructor to specify whether forward
* or reverse rules are parsed. The other rules are ignored.
*/
enum Direction {
/**
* Direction constant passed to constructor to create a transliterator
* using the forward rules.
*/
FORWARD,
/**
* Direction constant passed to constructor to create a transliterator
* using the reverse rules.
*/
REVERSE
};
/**
* Constructs a new transliterator from the given rules.
* @param rules rules, separated by ';'

View File

@ -222,6 +222,17 @@ class U_I18N_API Transliterator {
public:
/**
* Direction constant indicating the direction in a transliterator, e.g.,
* the forward or reverse rules of a RuleBasedTransliterator. An "A-B"
* transliterator transliterates A to B when operating in the forward
* direction, and B to A when operating in the reverse direction.
*/
enum Direction {
FORWARD,
REVERSE
};
enum {
/**
* In the <code>keyboardTransliterate()</code>
@ -719,7 +730,7 @@ public:
* transliterator is registered.
* @see #registerInstance
*/
virtual Transliterator* createInverse(void) const;
Transliterator* createInverse(void) const;
/**
* Returns a <code>Transliterator</code> object given its ID.
@ -733,7 +744,8 @@ public:
* @see #getAvailableIDs
* @see #getID
*/
static Transliterator* createInstance(const UnicodeString& ID);
static Transliterator* createInstance(const UnicodeString& ID,
Direction dir = FORWARD);
private:

View File

@ -371,21 +371,13 @@ void TransliteratorTest::TestArabic(void) {
* some strings that should come out unchanged.
*/
void TransliteratorTest::TestCompoundKana(void) {
Transliterator* kana = Transliterator::createInstance("Latin-Kana");
Transliterator* rkana = Transliterator::createInstance("Kana-Latin");
Transliterator* trans[] = { kana, rkana };
if (kana == 0 || rkana == 0) {
errln("FAIL: construction failed");
delete kana;
delete rkana;
return;
Transliterator* t = Transliterator::createInstance("Latin-Kana;Kana-Latin");
if (t == 0) {
errln("FAIL: construction of Latin-Kana;Kana-Latin failed");
} else {
expect(*t, "aaaaa", "aaaaa");
delete t;
}
Transliterator* t = new CompoundTransliterator("<ID>", trans, 2);
expect(*t, "aaaaa", "aaaaa");
delete t;
delete kana;
delete rkana;
}
/**
@ -406,13 +398,13 @@ void TransliteratorTest::TestCompoundHex(void) {
// Do some basic tests of b
expect(*b, "\\u0030\\u0031", "01");
Transliterator* ab = new CompoundTransliterator("ab", transab, 2);
Transliterator* ab = new CompoundTransliterator(transab, 2);
UnicodeString s("abcde");
expect(*ab, s, s);
UnicodeString str(s);
a->transliterate(str);
Transliterator* ba = new CompoundTransliterator("ba", transba, 2);
Transliterator* ba = new CompoundTransliterator(transba, 2);
expect(*ba, str, str);
delete ab;