ICU-199 compound ID support
X-SVN-Rev: 585
This commit is contained in:
parent
cc305ed674
commit
d2185dcbe6
@ -11,12 +11,6 @@
|
||||
#include "unicode/unifilt.h"
|
||||
#include "unicode/unifltlg.h"
|
||||
|
||||
CompoundTransliterator::CompoundTransliterator(const UnicodeString& ID,
|
||||
UnicodeFilter* adoptedFilter) :
|
||||
Transliterator(ID,adoptedFilter),
|
||||
trans(0), count(0) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new compound transliterator given an array of
|
||||
* transliterators. The array of transliterators may be of any
|
||||
@ -29,13 +23,80 @@ CompoundTransliterator::CompoundTransliterator(const UnicodeString& ID,
|
||||
* altered by this transliterator. If <tt>filter</tt> is
|
||||
* <tt>null</tt> then no filtering is applied.
|
||||
*/
|
||||
CompoundTransliterator::CompoundTransliterator(const UnicodeString& ID,
|
||||
CompoundTransliterator::CompoundTransliterator(
|
||||
Transliterator* const transliterators[],
|
||||
int32_t transCount,
|
||||
int32_t count,
|
||||
UnicodeFilter* adoptedFilter) :
|
||||
Transliterator(ID,adoptedFilter),
|
||||
Transliterator(joinIDs(transliterators, count), adoptedFilter),
|
||||
trans(0), count(0) {
|
||||
setTransliterators(transliterators, transCount);
|
||||
setTransliterators(transliterators, count);
|
||||
}
|
||||
|
||||
/**
|
||||
* Splits an ID of the form "ID;ID;..." into a compound using each
|
||||
* of the IDs.
|
||||
* @param ID of above form
|
||||
* @param forward if false, does the list in reverse order, and
|
||||
* takes the inverse of each ID.
|
||||
*/
|
||||
CompoundTransliterator::CompoundTransliterator(const UnicodeString& ID,
|
||||
Transliterator::Direction direction,
|
||||
UnicodeFilter* adoptedFilter) :
|
||||
Transliterator(ID, adoptedFilter) {
|
||||
// changed MED
|
||||
// Later, add "rule1[filter];rule2...
|
||||
UnicodeString* list = split(ID, ';', count);
|
||||
trans = new Transliterator*[count];
|
||||
for (int32_t i = 0; i < count; ++i) {
|
||||
trans[i] = createInstance(list[direction==FORWARD ? i : (count-1-i)],
|
||||
direction);
|
||||
}
|
||||
delete[] list;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the IDs of the given list of transliterators, concatenated
|
||||
* with ';' delimiting them. Equivalent to the perlish expression
|
||||
* join(';', map($_.getID(), transliterators).
|
||||
*/
|
||||
UnicodeString CompoundTransliterator::joinIDs(Transliterator* const transliterators[],
|
||||
int32_t count) {
|
||||
UnicodeString id;
|
||||
for (int32_t i=0; i<count; ++i) {
|
||||
if (i > 0) {
|
||||
id.append((UChar)';');
|
||||
}
|
||||
id.append(transliterators[i]->getID());
|
||||
}
|
||||
return id; // Return temporary
|
||||
}
|
||||
|
||||
/**
|
||||
* Splits a string, as in JavaScript
|
||||
*/
|
||||
UnicodeString* CompoundTransliterator::split(const UnicodeString& s,
|
||||
UChar divider,
|
||||
int32_t& count) {
|
||||
// changed MED
|
||||
// see how many there are
|
||||
count = 1;
|
||||
for (int32_t i = 0; i < s.length(); ++i) {
|
||||
if (s.charAt(i) == divider) ++count;
|
||||
}
|
||||
|
||||
// make an array with them
|
||||
UnicodeString* result = new UnicodeString[count];
|
||||
int32_t last = 0;
|
||||
int32_t current = 0;
|
||||
int32_t i;
|
||||
for (i = 0; i < s.length(); ++i) {
|
||||
if (s.charAt(i) == divider) {
|
||||
s.extractBetween(last, i, result[current++]);
|
||||
last = i+1;
|
||||
}
|
||||
}
|
||||
s.extractBetween(last, i, result[current]);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include "unicode/unitohex.h"
|
||||
#include "unicode/nultrans.h"
|
||||
#include "unicode/putil.h"
|
||||
#include "unicode/cpdtrans.h"
|
||||
|
||||
/**
|
||||
* Dictionary of known transliterators. Keys are <code>String</code>
|
||||
@ -522,15 +523,7 @@ void Transliterator::adoptFilter(UnicodeFilter* filterToAdopt) {
|
||||
* @see #registerInstance
|
||||
*/
|
||||
Transliterator* Transliterator::createInverse(void) const {
|
||||
int32_t i = ID.indexOf((UChar)'-');
|
||||
if (i >= 0) {
|
||||
UnicodeString inverseID, right;
|
||||
ID.extractBetween(i+1, ID.length(), inverseID);
|
||||
ID.extractBetween(0, i, right);
|
||||
inverseID.append((UChar)'-').append(right);
|
||||
return _createInstance(inverseID);
|
||||
}
|
||||
return 0;
|
||||
return Transliterator::createInstance(ID, REVERSE);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -545,8 +538,24 @@ Transliterator* Transliterator::createInverse(void) const {
|
||||
* @see #getAvailableIDs
|
||||
* @see #getID
|
||||
*/
|
||||
Transliterator* Transliterator::createInstance(const UnicodeString& ID) {
|
||||
Transliterator* t = _createInstance(ID);
|
||||
Transliterator* Transliterator::createInstance(const UnicodeString& ID,
|
||||
Transliterator::Direction dir) {
|
||||
if (ID.indexOf(';') >= 0) {
|
||||
return new CompoundTransliterator(ID, dir, 0);
|
||||
}
|
||||
Transliterator* t = 0;
|
||||
if (dir == REVERSE) {
|
||||
int32_t i = ID.indexOf((UChar)'-');
|
||||
if (i >= 0) {
|
||||
UnicodeString inverseID, right;
|
||||
ID.extractBetween(i+1, ID.length(), inverseID);
|
||||
ID.extractBetween(0, i, right);
|
||||
inverseID.append((UChar)'-').append(right);
|
||||
t = _createInstance(inverseID);
|
||||
}
|
||||
} else {
|
||||
t = _createInstance(ID);
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
|
@ -32,7 +32,7 @@
|
||||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: cpdtrans.h,v $ $Revision: 1.1 $ $Date: 1999/12/28 23:54:20 $
|
||||
* @version $RCSfile: cpdtrans.h,v $ $Revision: 1.2 $ $Date: 2000/01/14 21:15:13 $
|
||||
*/
|
||||
class U_I18N_API CompoundTransliterator : public Transliterator {
|
||||
|
||||
@ -54,12 +54,12 @@ public:
|
||||
* altered by this transliterator. If <tt>filter</tt> is
|
||||
* <tt>null</tt> then no filtering is applied.
|
||||
*/
|
||||
CompoundTransliterator(const UnicodeString& ID,
|
||||
Transliterator* const transliterators[],
|
||||
CompoundTransliterator(Transliterator* const transliterators[],
|
||||
int32_t count,
|
||||
UnicodeFilter* adoptedFilter = 0);
|
||||
|
||||
CompoundTransliterator(const UnicodeString& ID,
|
||||
Direction dir = FORWARD,
|
||||
UnicodeFilter* adoptedFilter = 0);
|
||||
|
||||
/**
|
||||
@ -128,6 +128,21 @@ public:
|
||||
|
||||
private:
|
||||
|
||||
/**
|
||||
* Return the IDs of the given list of transliterators, concatenated
|
||||
* with ';' delimiting them. Equivalent to the perlish expression
|
||||
* join(';', map($_.getID(), transliterators).
|
||||
*/
|
||||
UnicodeString joinIDs(Transliterator* const transliterators[],
|
||||
int32_t count);
|
||||
|
||||
/**
|
||||
* Splits a string, as in JavaScript
|
||||
*/
|
||||
UnicodeString* split(const UnicodeString& s,
|
||||
UChar divider,
|
||||
int32_t& count);
|
||||
|
||||
void freeTransliterators(void);
|
||||
};
|
||||
#endif
|
||||
|
@ -221,24 +221,6 @@ class U_I18N_API RuleBasedTransliterator : public Transliterator {
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* Direction constant passed to constructor to specify whether forward
|
||||
* or reverse rules are parsed. The other rules are ignored.
|
||||
*/
|
||||
enum Direction {
|
||||
/**
|
||||
* Direction constant passed to constructor to create a transliterator
|
||||
* using the forward rules.
|
||||
*/
|
||||
FORWARD,
|
||||
|
||||
/**
|
||||
* Direction constant passed to constructor to create a transliterator
|
||||
* using the reverse rules.
|
||||
*/
|
||||
REVERSE
|
||||
};
|
||||
|
||||
/**
|
||||
* Constructs a new transliterator from the given rules.
|
||||
* @param rules rules, separated by ';'
|
||||
|
@ -222,6 +222,17 @@ class U_I18N_API Transliterator {
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* Direction constant indicating the direction in a transliterator, e.g.,
|
||||
* the forward or reverse rules of a RuleBasedTransliterator. An "A-B"
|
||||
* transliterator transliterates A to B when operating in the forward
|
||||
* direction, and B to A when operating in the reverse direction.
|
||||
*/
|
||||
enum Direction {
|
||||
FORWARD,
|
||||
REVERSE
|
||||
};
|
||||
|
||||
enum {
|
||||
/**
|
||||
* In the <code>keyboardTransliterate()</code>
|
||||
@ -719,7 +730,7 @@ public:
|
||||
* transliterator is registered.
|
||||
* @see #registerInstance
|
||||
*/
|
||||
virtual Transliterator* createInverse(void) const;
|
||||
Transliterator* createInverse(void) const;
|
||||
|
||||
/**
|
||||
* Returns a <code>Transliterator</code> object given its ID.
|
||||
@ -733,7 +744,8 @@ public:
|
||||
* @see #getAvailableIDs
|
||||
* @see #getID
|
||||
*/
|
||||
static Transliterator* createInstance(const UnicodeString& ID);
|
||||
static Transliterator* createInstance(const UnicodeString& ID,
|
||||
Direction dir = FORWARD);
|
||||
|
||||
private:
|
||||
|
||||
|
@ -371,21 +371,13 @@ void TransliteratorTest::TestArabic(void) {
|
||||
* some strings that should come out unchanged.
|
||||
*/
|
||||
void TransliteratorTest::TestCompoundKana(void) {
|
||||
Transliterator* kana = Transliterator::createInstance("Latin-Kana");
|
||||
Transliterator* rkana = Transliterator::createInstance("Kana-Latin");
|
||||
Transliterator* trans[] = { kana, rkana };
|
||||
if (kana == 0 || rkana == 0) {
|
||||
errln("FAIL: construction failed");
|
||||
delete kana;
|
||||
delete rkana;
|
||||
return;
|
||||
Transliterator* t = Transliterator::createInstance("Latin-Kana;Kana-Latin");
|
||||
if (t == 0) {
|
||||
errln("FAIL: construction of Latin-Kana;Kana-Latin failed");
|
||||
} else {
|
||||
expect(*t, "aaaaa", "aaaaa");
|
||||
delete t;
|
||||
}
|
||||
Transliterator* t = new CompoundTransliterator("<ID>", trans, 2);
|
||||
|
||||
expect(*t, "aaaaa", "aaaaa");
|
||||
delete t;
|
||||
delete kana;
|
||||
delete rkana;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -406,13 +398,13 @@ void TransliteratorTest::TestCompoundHex(void) {
|
||||
// Do some basic tests of b
|
||||
expect(*b, "\\u0030\\u0031", "01");
|
||||
|
||||
Transliterator* ab = new CompoundTransliterator("ab", transab, 2);
|
||||
Transliterator* ab = new CompoundTransliterator(transab, 2);
|
||||
UnicodeString s("abcde");
|
||||
expect(*ab, s, s);
|
||||
|
||||
UnicodeString str(s);
|
||||
a->transliterate(str);
|
||||
Transliterator* ba = new CompoundTransliterator("ba", transba, 2);
|
||||
Transliterator* ba = new CompoundTransliterator(transba, 2);
|
||||
expect(*ba, str, str);
|
||||
|
||||
delete ab;
|
||||
|
Loading…
Reference in New Issue
Block a user