ICU-1079 initial implementation of Source-Target/Variant; no RB lookup yet

X-SVN-Rev: 5412
This commit is contained in:
Alan Liu 2001-08-01 17:37:08 +00:00
parent 0594f1d90d
commit caa1bdd17e
13 changed files with 333 additions and 45 deletions

View File

@ -106,7 +106,7 @@ zh.txt zh__PINYIN.txt zh_CN.txt zh_HK.txt zh_SG.txt zh_TW.txt zh_TW_STROKE.txt
TRANSLIT_SOURCE=fullhalf.txt translit_index.txt kana.txt kbdescl1.txt\
larabic.txt lcyril.txt ldevan.txt\
lgreek.txt lhebrew.txt ljamo.txt\
lkana.txt quotes.txt ucname.txt\
lkana.txt quotes.txt\
Bengali_InterIndic.txt\
Devanagari_InterIndic.txt\
Gujarati_InterIndic.txt\

View File

@ -97,7 +97,9 @@ translit_index {
// Java only: { "Kanji-English", "file", "-", "FORWARD" },
// Java only: { "Kanji-OnRomaji", "file", "-", "FORWARD" },
{ "KeyboardEscape-Latin1", "file", "kbdescl1", "FORWARD" },
{ "UnicodeName-UnicodeChar", "file", "ucname", "FORWARD" },
// Replaced by algorithmic transliterator:
// { "UnicodeName-UnicodeChar", "file", "ucname", "FORWARD" },
// Compound rules

View File

@ -106,7 +106,7 @@ zh.txt zh__PINYIN.txt zh_CN.txt zh_HK.txt zh_SG.txt zh_TW.txt zh_TW_STROKE.txt
TRANSLIT_SOURCE=fullhalf.txt translit_index.txt kana.txt kbdescl1.txt\
larabic.txt lcyril.txt ldevan.txt\
lgreek.txt lhebrew.txt ljamo.txt\
lkana.txt quotes.txt ucname.txt\
lkana.txt quotes.txt\
Bengali_InterIndic.txt\
Devanagari_InterIndic.txt\
Gujarati_InterIndic.txt\

View File

@ -97,7 +97,9 @@ translit_index {
// Java only: { "Kanji-English", "file", "-", "FORWARD" },
// Java only: { "Kanji-OnRomaji", "file", "-", "FORWARD" },
{ "KeyboardEscape-Latin1", "file", "kbdescl1", "FORWARD" },
{ "UnicodeName-UnicodeChar", "file", "ucname", "FORWARD" },
// Replaced by algorithmic transliterator:
// { "UnicodeName-UnicodeChar", "file", "ucname", "FORWARD" },
// Compound rules

View File

@ -15,7 +15,7 @@
/**
* ID for this transliterator.
*/
const char* HexToUnicodeTransliterator::_ID = "Hex-Unicode";
const char* HexToUnicodeTransliterator::_ID = "Hex-Any";
/**
* This pattern encodes the following specs for the default constructor:

View File

@ -39,12 +39,17 @@
// keep in sync with CompoundTransliterator
static const UChar ID_SEP = 0x002D; /*-*/
static const UChar ID_DELIM = 0x003B; /*;*/
static const UChar VARIANT_SEP = 0x002F; // '/'
static const UChar OPEN_PAREN = 40;
static const UChar CLOSE_PAREN = 41;
static Hashtable _cache(TRUE); // TRUE = keys are case insensitive
static Hashtable _internalCache(TRUE); // TRUE = keys are case insensitive
// Map of source name to (Hashtable mapping target to (UVector of
// target names).
static Hashtable sourceMap(TRUE);
/**
* Cache of public system transliterators. Keys are UnicodeString
* names, values are CacheEntry objects.
@ -1350,7 +1355,7 @@ void Transliterator::_registerFactory(const UnicodeString& id,
CacheEntry* entry = (CacheEntry*) cache->get(id);
if (entry == 0) {
cacheIDs.addElement((void*) new UnicodeString(id));
_registerID(id);
entry = new CacheEntry();
}
entry->setFactory(factory);
@ -1396,7 +1401,7 @@ void Transliterator::_registerInstance(Transliterator* adoptedPrototype,
CacheEntry* entry = (CacheEntry*) cache->get(id);
if (entry == 0) {
cacheIDs.addElement((void*) new UnicodeString(id));
_registerID(id);
entry = new CacheEntry();
}
@ -1464,6 +1469,114 @@ const UnicodeString& Transliterator::getAvailableID(int32_t index) {
return *(const UnicodeString*) cacheIDs[index];
}
int32_t Transliterator::countAvailableSources(void) {
if (!cacheInitialized) {
initializeCache();
}
Mutex lock(&cacheMutex);
return sourceMap.count();
}
UnicodeString& Transliterator::getAvailableSource(int32_t index,
UnicodeString& result) {
if (!cacheInitialized) {
initializeCache();
}
Mutex lock(&cacheMutex);
int32_t pos = -1;
const UHashElement *e = 0;
while (index-- >= 0) {
e = sourceMap.nextElement(pos);
if (e == 0) {
break;
}
}
if (e == 0) {
result.truncate(0);
} else {
result = *(UnicodeString*) e->key.pointer;
}
return result;
}
int32_t Transliterator::countAvailableTargets(const UnicodeString& source) {
if (!cacheInitialized) {
initializeCache();
}
Mutex lock(&cacheMutex);
Hashtable *targets = (Hashtable*) sourceMap.get(source);
return (targets == 0) ? 0 : targets->count();
}
UnicodeString& Transliterator::getAvailableTarget(int32_t index,
const UnicodeString& source,
UnicodeString& result) {
if (!cacheInitialized) {
initializeCache();
}
Mutex lock(&cacheMutex);
Hashtable *targets = (Hashtable*) sourceMap.get(source);
if (targets == 0) {
result.truncate(0); // invalid source
return result;
}
int32_t pos = -1;
const UHashElement *e = 0;
while (index-- >= 0) {
e = targets->nextElement(pos);
if (e == 0) {
break;
}
}
if (e == 0) {
result.truncate(0); // invalid index
} else {
result = *(UnicodeString*) e->key.pointer;
}
return result;
}
int32_t Transliterator::countAvailableVariants(const UnicodeString& source,
const UnicodeString& target) {
if (!cacheInitialized) {
initializeCache();
}
Mutex lock(&cacheMutex);
Hashtable *targets = (Hashtable*) sourceMap.get(source);
if (targets == 0) {
return 0;
}
UVector *variants = (UVector*) targets->get(target);
return (variants == 0) ? 0 : variants->size();
}
UnicodeString& Transliterator::getAvailableVariant(int32_t index,
const UnicodeString& source,
const UnicodeString& target,
UnicodeString& result) {
if (!cacheInitialized) {
initializeCache();
}
Mutex lock(&cacheMutex);
Hashtable *targets = (Hashtable*) sourceMap.get(source);
if (targets == 0) {
result.truncate(0); // invalid source
return result;
}
UVector *variants = (UVector*) targets->get(target);
if (variants == 0) {
result.truncate(0); // invalid target
return result;
}
UnicodeString *v = (UnicodeString*) variants->elementAt(index);
if (v == 0) {
result.truncate(0); // invalid index
} else {
result = *v;
}
return result;
}
/**
* Method for subclasses to use to obtain a character in the given
* string, with filtering.
@ -1477,6 +1590,68 @@ UChar Transliterator::filteredCharAt(const Replaceable& text, int32_t i) const {
(localFilter->contains(c = text.charAt(i)) ? c : (UChar)0xFFFE);
}
/**
* Register an ID (with no whitespace in it, no inline filter, and
* not compound) in the Source-Target/Variant record.
*/
void Transliterator::_registerID(const UnicodeString& id) {
// cacheMutex must already be held (by caller)
cacheIDs.addElement((void*) new UnicodeString(id));
UnicodeString source, target, variant;
int32_t dash = id.indexOf(ID_SEP);
int32_t stroke = id.indexOf(VARIANT_SEP);
int32_t start = 0;
int32_t limit = id.length();
if (dash < 0) {
source = UnicodeString("Any", "");
} else {
id.extractBetween(0, dash, source);
start = dash + 1;
}
if (stroke >= 0) {
id.extractBetween(stroke + 1, id.length(), variant);
limit = stroke;
}
id.extractBetween(start, limit, target);
_registerSTV(source, target, variant);
}
/**
* Register a source-target/variant in the Source-Target/Variant record.
* Variant may be empty, but source and target must not be.
*/
void Transliterator::_registerSTV(const UnicodeString& source,
const UnicodeString& target,
const UnicodeString& variant) {
// cacheMutex must already be held (by caller)
// assert(source.length() > 0);
// assert(target.length() > 0);
UErrorCode status = U_ZERO_ERROR;
Hashtable *targets = (Hashtable*) sourceMap.get(source);
if (targets == 0) {
targets = new Hashtable(TRUE);
if (targets == 0) {
return;
}
targets->setValueDeleter(uhash_deleteUVector);
sourceMap.put(source, targets, status);
}
UVector *variants = (UVector*) targets->get(target);
if (variants == 0) {
variants = new UVector(uhash_deleteUnicodeString,
uhash_compareCaselessUnicodeString);
if (variants == 0) {
return;
}
targets->put(target, variants, status);
}
if (variant.length() > 0 &&
!variants->contains((void*) &variant)) {
variants->addElement(new UnicodeString(variant));
}
}
void Transliterator::initializeCache(void) {
// Lock first, check init boolean second
Mutex lock(&cacheMutex);
@ -1492,6 +1667,8 @@ void Transliterator::initializeCache(void) {
cacheIDs.setDeleter(uhash_deleteUnicodeString);
cacheIDs.setComparer(uhash_compareCaselessUnicodeString);
sourceMap.setValueDeleter(uhash_deleteHashtable);
/* The following code parses the index table located in
* icu/data/translit_index.txt. The index is an n x 4 table
* that follows this format:
@ -1555,9 +1732,8 @@ void Transliterator::initializeCache(void) {
Hashtable* c = isInternal ? internalCache : cache;
c->put(id, entry, status);
// cacheIDs owns & should delete the following string
if (!isInternal) {
cacheIDs.addElement((void*) new UnicodeString(id));
_registerID(id);
}
}
}

View File

@ -858,6 +858,12 @@ private:
friend class NormalizationTransliterator;
static void _registerID(const UnicodeString& id);
static void _registerSTV(const UnicodeString& source,
const UnicodeString& target,
const UnicodeString& variant);
public:
/**
@ -918,6 +924,66 @@ public:
*/
static const UnicodeString& getAvailableID(int32_t index);
/**
* Return the number of registered source specifiers.
*/
static int32_t countAvailableSources(void);
/**
* Return a registered source specifier.
* @param index which specifier to return, from 0 to n-1, where
* n = countAvailableSources()
* @param result fill-in paramter to receive the source specifier.
* If index is out of range, result will be empty.
* @return reference to result
*/
static UnicodeString& getAvailableSource(int32_t index,
UnicodeString& result);
/**
* Return the number of registered target specifiers for a given
* source specifier.
*/
static int32_t countAvailableTargets(const UnicodeString& source);
/**
* Return a registered target specifier for a given source.
* @param index which specifier to return, from 0 to n-1, where
* n = countAvailableTargets(source)
* @param source the source specifier
* @param result fill-in paramter to receive the target specifier.
* If source is invalid or if index is out of range, result will
* be empty.
* @return reference to result
*/
static UnicodeString& getAvailableTarget(int32_t index,
const UnicodeString& source,
UnicodeString& result);
/**
* Return the number of registered variant specifiers for a given
* source-target pair.
*/
static int32_t countAvailableVariants(const UnicodeString& source,
const UnicodeString& target);
/**
* Return a registered variant specifier for a given source-target
* pair.
* @param index which specifier to return, from 0 to n-1, where
* n = countAvailableVariants(source, target)
* @param source the source specifier
* @param target the target specifier
* @param result fill-in paramter to receive the variant
* specifier. If source is invalid or if target is invalid or if
* index is out of range, result will be empty.
* @return reference to result
*/
static UnicodeString& getAvailableVariant(int32_t index,
const UnicodeString& source,
const UnicodeString& target,
UnicodeString& result);
/**
* Return the class ID for this class. This is useful only for
* comparing to a return value from getDynamicClassID(). For example:

View File

@ -14,7 +14,7 @@
/**
* ID for this transliterator.
*/
const char* UnicodeToHexTransliterator::_ID = "Unicode-Hex";
const char* UnicodeToHexTransliterator::_ID = "Any-Hex";
const UChar UnicodeToHexTransliterator::HEX_DIGITS[32] = {
// Use Unicode hex values for EBCDIC compatibility

View File

@ -188,7 +188,7 @@ void CompoundTransliteratorTest::TestGetCount(){
logln("Testing the getCount() API of CompoundTransliterator");
UErrorCode status = U_ZERO_ERROR;
CompoundTransliterator *ct1=new CompoundTransliterator("Halfwidth-Fullwidth;Fullwidth-Halfwidth", status);
CompoundTransliterator *ct2=new CompoundTransliterator("Unicode-Hex;Hex-Unicode;Cyrillic-Latin;Latin-Cyrillic", status);
CompoundTransliterator *ct2=new CompoundTransliterator("Any-Hex;Hex-Any;Cyrillic-Latin;Latin-Cyrillic", status);
CompoundTransliterator *ct3=(CompoundTransliterator*)ct1;
CompoundTransliterator *ct4=new CompoundTransliterator("Latin-Devanagari", status);
CompoundTransliterator *ct5=new CompoundTransliterator(*ct4);
@ -211,7 +211,7 @@ void CompoundTransliteratorTest::TestGetCount(){
void CompoundTransliteratorTest::TestGetSetAdoptTransliterator(){
logln("Testing the getTransliterator() API of CompoundTransliterator");
UnicodeString ID("Latin-Greek;Greek-Latin;Latin-Devanagari;Devanagari-Latin;Latin-Cyrillic;Cyrillic-Latin;Unicode-Hex;Hex-Unicode");
UnicodeString ID("Latin-Greek;Greek-Latin;Latin-Devanagari;Devanagari-Latin;Latin-Cyrillic;Cyrillic-Latin;Any-Hex;Hex-Any");
UErrorCode status = U_ZERO_ERROR;
CompoundTransliterator *ct1=new CompoundTransliterator(ID, status);
if(U_FAILURE(status)){
@ -233,7 +233,7 @@ void CompoundTransliteratorTest::TestGetSetAdoptTransliterator(){
logln("Testing setTransliterator() API of CompoundTransliterator");
UnicodeString ID2("Hex-Unicode;Unicode-Hex;Latin-Cyrillic;Cyrillic-Latin;Halfwidth-Fullwidth;Fullwidth-Halfwidth");
UnicodeString ID2("Hex-Any;Any-Hex;Latin-Cyrillic;Cyrillic-Latin;Halfwidth-Fullwidth;Fullwidth-Halfwidth");
array=split(ID2, 0x003b, count);
Transliterator** transarray=new Transliterator*[count];
for(i=0;i<count;i++){
@ -306,7 +306,7 @@ UnicodeString* CompoundTransliteratorTest::split(const UnicodeString& str, UChar
void CompoundTransliteratorTest::TestTransliterate(){
logln("Testing the handleTransliterate() API of CompoundTransliterator");
UErrorCode status = U_ZERO_ERROR;
CompoundTransliterator *ct1=new CompoundTransliterator("Unicode-Hex;Hex-Unicode", status);
CompoundTransliterator *ct1=new CompoundTransliterator("Any-Hex;Hex-Any", status);
if(U_FAILURE(status)){
errln("CompoundTransliterator construction failed");
}else {
@ -327,8 +327,8 @@ void CompoundTransliteratorTest::TestTransliterate(){
delete ct1;
UnicodeString Data[]={
//ID, input string, transliterated string
"Unicode-Hex;Hex-Unicode;Unicode-Hex", "hello", UnicodeString("\\u0068\\u0065\\u006C\\u006C\\u006F", ""),
"Unicode-Hex;Hex-Unicode", "hello! How are you?", "hello! How are you?",
"Any-Hex;Hex-Any;Any-Hex", "hello", UnicodeString("\\u0068\\u0065\\u006C\\u006C\\u006F", ""),
"Any-Hex;Hex-Any", "hello! How are you?", "hello! How are you?",
"Devanagari-Latin;Latin-Devanagari", CharsToUnicodeString("\\u092D\\u0948'\\u0930'\\u0935"), CharsToUnicodeString("\\u092D\\u0948\\u0930\\u0935"), // quotes lost
"Latin-Cyrillic;Cyrillic-Latin", "a'b'k'd'e'f'g'h'i'j'Shch'shch'zh'h", "abkdefghijShchshchzhh",
"Latin-Greek;Greek-Latin", "ABGabgAKLMN", "ABGabgAKLMN",

View File

@ -150,8 +150,8 @@ void TransliteratorAPITest::TestgetInverse() {
"Latin-Cyrillic",
"Devanagari-Latin",
"Latin-Devanagari",
"Unicode-Hex",
"Hex-Unicode"
"Any-Hex",
"Hex-Any"
};
for(uint32_t i=0; i<sizeof(TransID)/sizeof(TransID[0]); i=i+2){
t1=Transliterator::createInstance(TransID[i]);
@ -195,13 +195,13 @@ void TransliteratorAPITest::TestGetDisplayName() {
UnicodeString dispNames[]= {
//ID, displayName
"CurlyQuotes-StraightQuotes" ,"CurlyQuotes to StraightQuotes",
"Unicode-Hex" ,"Unicode to Hex Escape",
"Any-Hex" ,"Any to Hex Escape",
"Halfwidth-Fullwidth" ,"Halfwidth to Fullwidth" ,
"Latin-Arabic" ,"Latin to Arabic" ,
"Latin-Devanagari" ,"Latin to Devanagari" ,
"Greek-Latin" ,"Greek to Latin" ,
"Arabic-Latin" ,"Arabic to Latin" ,
"Hex-Unicode" ,"Hex Escape to Unicode",
"Hex-Any" ,"Hex Escape to Any",
"Cyrillic-Latin" ,"Cyrillic to Latin" ,
"Latin-Greek" ,"Latin to Greek" ,
"Latin-Kana" ,"Latin to Kana" ,
@ -239,8 +239,8 @@ void TransliteratorAPITest::TestTransliterate1(){
UnicodeString Data[]={
//ID, input string, transliterated string
"Unicode-Hex", "hello", UnicodeString("\\u0068\\u0065\\u006C\\u006C\\u006F", "") ,
"Hex-Unicode", UnicodeString("\\u0068\\u0065\\u006C\\u006C\\u006F", ""), "hello" ,
"Any-Hex", "hello", UnicodeString("\\u0068\\u0065\\u006C\\u006C\\u006F", "") ,
"Hex-Any", UnicodeString("\\u0068\\u0065\\u006C\\u006C\\u006F", ""), "hello" ,
"Latin-Devanagari", "bhaarata", CharsToUnicodeString("\\u092D\\u093E\\u0930\\u0924") ,
"Devanagari-Latin", CharsToUnicodeString("\\u092D\\u093E\\u0930\\u0924"), "bhaarata" ,
// "Contracted-Expanded", CharsToUnicodeString("\\u00C0\\u00C1\\u0042"), CharsToUnicodeString("\\u0041\\u0300\\u0041\\u0301\\u0042") ,
@ -283,9 +283,9 @@ void TransliteratorAPITest::TestTransliterate2(){
//testing tranliterate(String text, int start, int limit, StringBuffer result)
UnicodeString Data2[]={
//ID, input string, start, limit, transliterated string
"Unicode-Hex", "hello! How are you?", "0", "5", UnicodeString("\\u0068\\u0065\\u006C\\u006C\\u006F", ""), UnicodeString("\\u0068\\u0065\\u006C\\u006C\\u006F! How are you?", "") ,
"Unicode-Hex", "hello! How are you?", "7", "12", UnicodeString("\\u0048\\u006F\\u0077\\u0020\\u0061", ""), UnicodeString("hello! \\u0048\\u006F\\u0077\\u0020\\u0061re you?", ""),
"Hex-Unicode", CharsToUnicodeString("\\u0068\\u0065\\u006C\\u006C\\u006F\\u0021\\u0020"), "0", "5", "hello", "hello! " ,
"Any-Hex", "hello! How are you?", "0", "5", UnicodeString("\\u0068\\u0065\\u006C\\u006C\\u006F", ""), UnicodeString("\\u0068\\u0065\\u006C\\u006C\\u006F! How are you?", "") ,
"Any-Hex", "hello! How are you?", "7", "12", UnicodeString("\\u0048\\u006F\\u0077\\u0020\\u0061", ""), UnicodeString("hello! \\u0048\\u006F\\u0077\\u0020\\u0061re you?", ""),
"Hex-Any", CharsToUnicodeString("\\u0068\\u0065\\u006C\\u006C\\u006F\\u0021\\u0020"), "0", "5", "hello", "hello! " ,
// "Contracted-Expanded", CharsToUnicodeString("\\u00C0\\u00C1\\u0042"), "1", "2", CharsToUnicodeString("\\u0041\\u0301"), CharsToUnicodeString("\\u00C0\\u0041\\u0301\\u0042") ,
"Devanagari-Latin", CharsToUnicodeString("\\u092D\\u093E\\u0930\\u0924"), "0", "1", "bha", CharsToUnicodeString("bha\\u093E\\u0930\\u0924") ,
"Devanagari-Latin", CharsToUnicodeString("\\u092D\\u093E\\u0930\\u0924"), "1", "2", "aa", CharsToUnicodeString("\\u092Daa\\u0930\\u0924")
@ -316,7 +316,7 @@ void TransliteratorAPITest::TestTransliterate2(){
logln("\n Try calling transliterate with illegal start and limit values");
t=Transliterator::createInstance("Unicode-Hex");
t=Transliterator::createInstance("Any-Hex");
gotResBuf = temp = "try start greater than limit";
t->transliterate(gotResBuf, 10, 5);
if(gotResBuf == temp)
@ -337,7 +337,7 @@ void TransliteratorAPITest::TestTransliterate3(){
};
int start, limit;
UnicodeString message;
Transliterator *t=Transliterator::createInstance("Unicode-Hex");
Transliterator *t=Transliterator::createInstance("Any-Hex");
if(t == 0)
errln("FAIL : construction");
for(uint32_t i=0; i<sizeof(Data)/sizeof(Data[0]); i=i+3){
@ -352,7 +352,7 @@ void TransliteratorAPITest::TestTransliterate3(){
void TransliteratorAPITest::TestSimpleKeyboardTransliterator(){
logln("simple call to transliterate");
UErrorCode status=U_ZERO_ERROR;
Transliterator* t=Transliterator::createInstance("Unicode-Hex");
Transliterator* t=Transliterator::createInstance("Any-Hex");
if(t == 0)
errln("FAIL : construction");
UTransPosition index={19,20,20,20};
@ -400,7 +400,7 @@ void TransliteratorAPITest::TestKeyboardTransliterator1(){
"", UnicodeString("\\u0061\\u0062\\u007A", "")
};
Transliterator* t=Transliterator::createInstance("Unicode-Hex");
Transliterator* t=Transliterator::createInstance("Any-Hex");
//keyboardAux(t, Data);
UTransPosition index={0, 0, 0, 0};
UErrorCode status=U_ZERO_ERROR;
@ -452,7 +452,7 @@ void TransliteratorAPITest::TestKeyboardTransliterator1(){
void TransliteratorAPITest::TestKeyboardTransliterator2(){
UnicodeString Data[]={
//insertion, buffer, index[START], index[LIMIT], index[CURSOR]
//data for Unicode-Hex
//data for Any-Hex
"abc", UnicodeString("Initial String: add-\\u0061\\u0062\\u0063-", ""), "19", "20", "20",
"a", UnicodeString("In\\u0069\\u0061tial String: add-\\u0061\\u0062\\u0063-", ""), "2", "3", "2" ,
"b", UnicodeString("\\u0062In\\u0069\\u0061tial String: add-\\u0061\\u0062\\u0063-", ""), "0", "0", "0" ,
@ -473,7 +473,7 @@ void TransliteratorAPITest::TestKeyboardTransliterator2(){
logln("Testing transliterate(Replaceable, int32_t, UnicodeString, UErrorCode)");
rs="Initial String: add--";
t=Transliterator::createInstance("Unicode-Hex");
t=Transliterator::createInstance("Any-Hex");
if(t == 0)
errln("FAIL : construction");
else {
@ -508,7 +508,7 @@ void TransliteratorAPITest::TestKeyboardTransliterator3(){
UErrorCode status=U_ZERO_ERROR;
UTransPosition index={0, 0, 0, 0};
logln("Testing transliterate(Replaceable, int32_t, UErrorCode)");
Transliterator *t=Transliterator::createInstance("Unicode-Hex");
Transliterator *t=Transliterator::createInstance("Any-Hex");
if(t == 0)
errln("FAIL : construction");
for(uint32_t i=0; i<sizeof(Data)/sizeof(Data[0]); i=i+4){
@ -650,7 +650,7 @@ class TestFilter3 : public UnicodeFilter {
void TransliteratorAPITest::TestGetAdoptFilter(){
Transliterator *t=Transliterator::createInstance("Unicode-Hex");
Transliterator *t=Transliterator::createInstance("Any-Hex");
if(t == 0)
errln("FAIL : construction");
const UnicodeFilter *u=t->getFilter();

View File

@ -69,6 +69,7 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
TESTCASE(33,TestContext);
TESTCASE(34,TestSupplemental);
TESTCASE(35,TestQuantifier);
TESTCASE(36,TestSTV);
default: name = ""; break;
}
}
@ -444,8 +445,8 @@ void TransliteratorTest::TestCompoundKana(void) {
* Compose the hex transliterators forward and reverse.
*/
void TransliteratorTest::TestCompoundHex(void) {
Transliterator* a = Transliterator::createInstance("Unicode-Hex");
Transliterator* b = Transliterator::createInstance("Hex-Unicode");
Transliterator* a = Transliterator::createInstance("Any-Hex");
Transliterator* b = Transliterator::createInstance("Hex-Any");
Transliterator* transab[] = { a, b };
Transliterator* transba[] = { b, a };
if (a == 0 || b == 0) {
@ -490,9 +491,9 @@ class TestFilter : public UnicodeFilter {
* Do some basic tests of filtering.
*/
void TransliteratorTest::TestFiltering(void) {
Transliterator* hex = Transliterator::createInstance("Unicode-Hex");
Transliterator* hex = Transliterator::createInstance("Any-Hex");
if (hex == 0) {
errln("FAIL: createInstance(Unicode-Hex) failed");
errln("FAIL: createInstance(Any-Hex) failed");
return;
}
hex->adoptFilter(new TestFilter());
@ -644,7 +645,7 @@ void TransliteratorTest::TestJ243(void) {
UErrorCode status = U_ZERO_ERROR;
#if !defined(HPUX)
// Test default Hex-Unicode, which should handle
// Test default Hex-Any, which should handle
// \u, \U, u+, and U+
HexToUnicodeTransliterator hex;
expect(hex, UnicodeString("\\u0041+\\U0042,u+0043uu+0044z", ""), "A+B,CuDz");
@ -654,7 +655,7 @@ void TransliteratorTest::TestJ243(void) {
HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), status);
expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;", ""),
"abcd5fx012&#x00033;");
// Try custom Unicode-Hex (default is tested elsewhere)
// Try custom Any-Hex (default is tested elsewhere)
status = U_ZERO_ERROR;
UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), status);
expect(hex3, "012", "&#x30;&#x31;&#x32;");
@ -952,13 +953,13 @@ void TransliteratorTest::TestFilterIDs(void) {
// Array of 3n strings:
// <id>, <inverse id>, <input>, <expected output>
const char* DATA[] = {
"Unicode[aeiou]-Hex",
"Hex[aeiou]-Unicode",
"Any[aeiou]-Hex",
"Hex[aeiou]-Any",
"quizzical",
"q\\u0075\\u0069zz\\u0069c\\u0061l",
"Unicode[aeiou]-Hex;Hex[^5]-Unicode",
"Unicode[^5]-Hex;Hex[aeiou]-Unicode",
"Any[aeiou]-Hex;Hex[^5]-Any",
"Any[^5]-Hex;Hex[aeiou]-Any",
"quizzical",
"q\\u0075izzical",
@ -1218,7 +1219,7 @@ void TransliteratorTest::TestCompoundRBT(void) {
// Careful with spacing and ';' here: Phrase this exactly
// as toRules() is going to return it. If toRules() changes
// with regard to spacing or ';', then adjust this string.
UnicodeString rule("::Hex-Unicode;\n"
UnicodeString rule("::Hex-Any;\n"
"::Any-Lower;\n"
"a > '.A.';\n"
"b > '.B.';\n"
@ -1553,6 +1554,42 @@ void TransliteratorTest::TestQuantifier() {
"bb x xb");
}
/**
* Test Source-Target/Variant.
*/
void TransliteratorTest::TestSTV(void) {
int32_t ns = Transliterator::countAvailableSources();
for (int32_t i=0; i<ns; ++i) {
UnicodeString source;
Transliterator::getAvailableSource(i, source);
logln((UnicodeString)"" + i + ": " + source);
if (source.length() == 0) {
errln("FAIL: empty source");
continue;
}
int32_t nt = Transliterator::countAvailableTargets(source);
for (int32_t j=0; j<nt; ++j) {
UnicodeString target;
Transliterator::getAvailableTarget(j, source, target);
logln((UnicodeString)" " + j + ": " + target);
if (target.length() == 0) {
errln("FAIL: empty target");
continue;
}
int32_t nv = Transliterator::countAvailableVariants(source, target);
for (int32_t k=0; j<nv; ++k) {
UnicodeString variant;
Transliterator::getAvailableVariant(k, source, target, variant);
logln((UnicodeString)" " + k + ": " + variant);
if (variant.length() == 0) {
errln("FAIL: empty variant");
break;
}
}
}
}
}
//======================================================================
// Support methods
//======================================================================

View File

@ -174,6 +174,11 @@ class TransliteratorTest : public IntlTest {
void TestQuantifier(void);
/**
* Test Source-Target/Variant.
*/
void TestSTV(void);
//======================================================================
// Support methods
//======================================================================

View File

@ -61,7 +61,7 @@ class Filter2: public UnicodeFilter{
void UnicodeFilterLogicTest::TestAll(){
Transliterator *t1=Transliterator::createInstance("Unicode-Hex");
Transliterator *t1=Transliterator::createInstance("Any-Hex");
if(t1 == 0){
errln("FAIL: Error in instantiation.");
return;