ICU-1079 initial implementation of Source-Target/Variant; no RB lookup yet
X-SVN-Rev: 5412
This commit is contained in:
parent
0594f1d90d
commit
caa1bdd17e
@ -106,7 +106,7 @@ zh.txt zh__PINYIN.txt zh_CN.txt zh_HK.txt zh_SG.txt zh_TW.txt zh_TW_STROKE.txt
|
||||
TRANSLIT_SOURCE=fullhalf.txt translit_index.txt kana.txt kbdescl1.txt\
|
||||
larabic.txt lcyril.txt ldevan.txt\
|
||||
lgreek.txt lhebrew.txt ljamo.txt\
|
||||
lkana.txt quotes.txt ucname.txt\
|
||||
lkana.txt quotes.txt\
|
||||
Bengali_InterIndic.txt\
|
||||
Devanagari_InterIndic.txt\
|
||||
Gujarati_InterIndic.txt\
|
||||
|
@ -97,7 +97,9 @@ translit_index {
|
||||
// Java only: { "Kanji-English", "file", "-", "FORWARD" },
|
||||
// Java only: { "Kanji-OnRomaji", "file", "-", "FORWARD" },
|
||||
{ "KeyboardEscape-Latin1", "file", "kbdescl1", "FORWARD" },
|
||||
{ "UnicodeName-UnicodeChar", "file", "ucname", "FORWARD" },
|
||||
|
||||
// Replaced by algorithmic transliterator:
|
||||
// { "UnicodeName-UnicodeChar", "file", "ucname", "FORWARD" },
|
||||
|
||||
// Compound rules
|
||||
|
||||
|
@ -106,7 +106,7 @@ zh.txt zh__PINYIN.txt zh_CN.txt zh_HK.txt zh_SG.txt zh_TW.txt zh_TW_STROKE.txt
|
||||
TRANSLIT_SOURCE=fullhalf.txt translit_index.txt kana.txt kbdescl1.txt\
|
||||
larabic.txt lcyril.txt ldevan.txt\
|
||||
lgreek.txt lhebrew.txt ljamo.txt\
|
||||
lkana.txt quotes.txt ucname.txt\
|
||||
lkana.txt quotes.txt\
|
||||
Bengali_InterIndic.txt\
|
||||
Devanagari_InterIndic.txt\
|
||||
Gujarati_InterIndic.txt\
|
||||
|
@ -97,7 +97,9 @@ translit_index {
|
||||
// Java only: { "Kanji-English", "file", "-", "FORWARD" },
|
||||
// Java only: { "Kanji-OnRomaji", "file", "-", "FORWARD" },
|
||||
{ "KeyboardEscape-Latin1", "file", "kbdescl1", "FORWARD" },
|
||||
{ "UnicodeName-UnicodeChar", "file", "ucname", "FORWARD" },
|
||||
|
||||
// Replaced by algorithmic transliterator:
|
||||
// { "UnicodeName-UnicodeChar", "file", "ucname", "FORWARD" },
|
||||
|
||||
// Compound rules
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
/**
|
||||
* ID for this transliterator.
|
||||
*/
|
||||
const char* HexToUnicodeTransliterator::_ID = "Hex-Unicode";
|
||||
const char* HexToUnicodeTransliterator::_ID = "Hex-Any";
|
||||
|
||||
/**
|
||||
* This pattern encodes the following specs for the default constructor:
|
||||
|
@ -39,12 +39,17 @@
|
||||
// keep in sync with CompoundTransliterator
|
||||
static const UChar ID_SEP = 0x002D; /*-*/
|
||||
static const UChar ID_DELIM = 0x003B; /*;*/
|
||||
static const UChar VARIANT_SEP = 0x002F; // '/'
|
||||
static const UChar OPEN_PAREN = 40;
|
||||
static const UChar CLOSE_PAREN = 41;
|
||||
|
||||
static Hashtable _cache(TRUE); // TRUE = keys are case insensitive
|
||||
static Hashtable _internalCache(TRUE); // TRUE = keys are case insensitive
|
||||
|
||||
// Map of source name to (Hashtable mapping target to (UVector of
|
||||
// target names).
|
||||
static Hashtable sourceMap(TRUE);
|
||||
|
||||
/**
|
||||
* Cache of public system transliterators. Keys are UnicodeString
|
||||
* names, values are CacheEntry objects.
|
||||
@ -1350,7 +1355,7 @@ void Transliterator::_registerFactory(const UnicodeString& id,
|
||||
|
||||
CacheEntry* entry = (CacheEntry*) cache->get(id);
|
||||
if (entry == 0) {
|
||||
cacheIDs.addElement((void*) new UnicodeString(id));
|
||||
_registerID(id);
|
||||
entry = new CacheEntry();
|
||||
}
|
||||
entry->setFactory(factory);
|
||||
@ -1396,7 +1401,7 @@ void Transliterator::_registerInstance(Transliterator* adoptedPrototype,
|
||||
|
||||
CacheEntry* entry = (CacheEntry*) cache->get(id);
|
||||
if (entry == 0) {
|
||||
cacheIDs.addElement((void*) new UnicodeString(id));
|
||||
_registerID(id);
|
||||
entry = new CacheEntry();
|
||||
}
|
||||
|
||||
@ -1464,6 +1469,114 @@ const UnicodeString& Transliterator::getAvailableID(int32_t index) {
|
||||
return *(const UnicodeString*) cacheIDs[index];
|
||||
}
|
||||
|
||||
int32_t Transliterator::countAvailableSources(void) {
|
||||
if (!cacheInitialized) {
|
||||
initializeCache();
|
||||
}
|
||||
Mutex lock(&cacheMutex);
|
||||
return sourceMap.count();
|
||||
}
|
||||
|
||||
UnicodeString& Transliterator::getAvailableSource(int32_t index,
|
||||
UnicodeString& result) {
|
||||
if (!cacheInitialized) {
|
||||
initializeCache();
|
||||
}
|
||||
Mutex lock(&cacheMutex);
|
||||
int32_t pos = -1;
|
||||
const UHashElement *e = 0;
|
||||
while (index-- >= 0) {
|
||||
e = sourceMap.nextElement(pos);
|
||||
if (e == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (e == 0) {
|
||||
result.truncate(0);
|
||||
} else {
|
||||
result = *(UnicodeString*) e->key.pointer;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int32_t Transliterator::countAvailableTargets(const UnicodeString& source) {
|
||||
if (!cacheInitialized) {
|
||||
initializeCache();
|
||||
}
|
||||
Mutex lock(&cacheMutex);
|
||||
Hashtable *targets = (Hashtable*) sourceMap.get(source);
|
||||
return (targets == 0) ? 0 : targets->count();
|
||||
}
|
||||
|
||||
UnicodeString& Transliterator::getAvailableTarget(int32_t index,
|
||||
const UnicodeString& source,
|
||||
UnicodeString& result) {
|
||||
if (!cacheInitialized) {
|
||||
initializeCache();
|
||||
}
|
||||
Mutex lock(&cacheMutex);
|
||||
Hashtable *targets = (Hashtable*) sourceMap.get(source);
|
||||
if (targets == 0) {
|
||||
result.truncate(0); // invalid source
|
||||
return result;
|
||||
}
|
||||
int32_t pos = -1;
|
||||
const UHashElement *e = 0;
|
||||
while (index-- >= 0) {
|
||||
e = targets->nextElement(pos);
|
||||
if (e == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (e == 0) {
|
||||
result.truncate(0); // invalid index
|
||||
} else {
|
||||
result = *(UnicodeString*) e->key.pointer;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int32_t Transliterator::countAvailableVariants(const UnicodeString& source,
|
||||
const UnicodeString& target) {
|
||||
if (!cacheInitialized) {
|
||||
initializeCache();
|
||||
}
|
||||
Mutex lock(&cacheMutex);
|
||||
Hashtable *targets = (Hashtable*) sourceMap.get(source);
|
||||
if (targets == 0) {
|
||||
return 0;
|
||||
}
|
||||
UVector *variants = (UVector*) targets->get(target);
|
||||
return (variants == 0) ? 0 : variants->size();
|
||||
}
|
||||
|
||||
UnicodeString& Transliterator::getAvailableVariant(int32_t index,
|
||||
const UnicodeString& source,
|
||||
const UnicodeString& target,
|
||||
UnicodeString& result) {
|
||||
if (!cacheInitialized) {
|
||||
initializeCache();
|
||||
}
|
||||
Mutex lock(&cacheMutex);
|
||||
Hashtable *targets = (Hashtable*) sourceMap.get(source);
|
||||
if (targets == 0) {
|
||||
result.truncate(0); // invalid source
|
||||
return result;
|
||||
}
|
||||
UVector *variants = (UVector*) targets->get(target);
|
||||
if (variants == 0) {
|
||||
result.truncate(0); // invalid target
|
||||
return result;
|
||||
}
|
||||
UnicodeString *v = (UnicodeString*) variants->elementAt(index);
|
||||
if (v == 0) {
|
||||
result.truncate(0); // invalid index
|
||||
} else {
|
||||
result = *v;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method for subclasses to use to obtain a character in the given
|
||||
* string, with filtering.
|
||||
@ -1477,6 +1590,68 @@ UChar Transliterator::filteredCharAt(const Replaceable& text, int32_t i) const {
|
||||
(localFilter->contains(c = text.charAt(i)) ? c : (UChar)0xFFFE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Register an ID (with no whitespace in it, no inline filter, and
|
||||
* not compound) in the Source-Target/Variant record.
|
||||
*/
|
||||
void Transliterator::_registerID(const UnicodeString& id) {
|
||||
// cacheMutex must already be held (by caller)
|
||||
cacheIDs.addElement((void*) new UnicodeString(id));
|
||||
|
||||
UnicodeString source, target, variant;
|
||||
int32_t dash = id.indexOf(ID_SEP);
|
||||
int32_t stroke = id.indexOf(VARIANT_SEP);
|
||||
int32_t start = 0;
|
||||
int32_t limit = id.length();
|
||||
if (dash < 0) {
|
||||
source = UnicodeString("Any", "");
|
||||
} else {
|
||||
id.extractBetween(0, dash, source);
|
||||
start = dash + 1;
|
||||
}
|
||||
if (stroke >= 0) {
|
||||
id.extractBetween(stroke + 1, id.length(), variant);
|
||||
limit = stroke;
|
||||
}
|
||||
id.extractBetween(start, limit, target);
|
||||
_registerSTV(source, target, variant);
|
||||
}
|
||||
|
||||
/**
|
||||
* Register a source-target/variant in the Source-Target/Variant record.
|
||||
* Variant may be empty, but source and target must not be.
|
||||
*/
|
||||
void Transliterator::_registerSTV(const UnicodeString& source,
|
||||
const UnicodeString& target,
|
||||
const UnicodeString& variant) {
|
||||
// cacheMutex must already be held (by caller)
|
||||
// assert(source.length() > 0);
|
||||
// assert(target.length() > 0);
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
Hashtable *targets = (Hashtable*) sourceMap.get(source);
|
||||
if (targets == 0) {
|
||||
targets = new Hashtable(TRUE);
|
||||
if (targets == 0) {
|
||||
return;
|
||||
}
|
||||
targets->setValueDeleter(uhash_deleteUVector);
|
||||
sourceMap.put(source, targets, status);
|
||||
}
|
||||
UVector *variants = (UVector*) targets->get(target);
|
||||
if (variants == 0) {
|
||||
variants = new UVector(uhash_deleteUnicodeString,
|
||||
uhash_compareCaselessUnicodeString);
|
||||
if (variants == 0) {
|
||||
return;
|
||||
}
|
||||
targets->put(target, variants, status);
|
||||
}
|
||||
if (variant.length() > 0 &&
|
||||
!variants->contains((void*) &variant)) {
|
||||
variants->addElement(new UnicodeString(variant));
|
||||
}
|
||||
}
|
||||
|
||||
void Transliterator::initializeCache(void) {
|
||||
// Lock first, check init boolean second
|
||||
Mutex lock(&cacheMutex);
|
||||
@ -1492,6 +1667,8 @@ void Transliterator::initializeCache(void) {
|
||||
cacheIDs.setDeleter(uhash_deleteUnicodeString);
|
||||
cacheIDs.setComparer(uhash_compareCaselessUnicodeString);
|
||||
|
||||
sourceMap.setValueDeleter(uhash_deleteHashtable);
|
||||
|
||||
/* The following code parses the index table located in
|
||||
* icu/data/translit_index.txt. The index is an n x 4 table
|
||||
* that follows this format:
|
||||
@ -1555,9 +1732,8 @@ void Transliterator::initializeCache(void) {
|
||||
Hashtable* c = isInternal ? internalCache : cache;
|
||||
c->put(id, entry, status);
|
||||
|
||||
// cacheIDs owns & should delete the following string
|
||||
if (!isInternal) {
|
||||
cacheIDs.addElement((void*) new UnicodeString(id));
|
||||
_registerID(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -858,6 +858,12 @@ private:
|
||||
|
||||
friend class NormalizationTransliterator;
|
||||
|
||||
static void _registerID(const UnicodeString& id);
|
||||
|
||||
static void _registerSTV(const UnicodeString& source,
|
||||
const UnicodeString& target,
|
||||
const UnicodeString& variant);
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
@ -918,6 +924,66 @@ public:
|
||||
*/
|
||||
static const UnicodeString& getAvailableID(int32_t index);
|
||||
|
||||
/**
|
||||
* Return the number of registered source specifiers.
|
||||
*/
|
||||
static int32_t countAvailableSources(void);
|
||||
|
||||
/**
|
||||
* Return a registered source specifier.
|
||||
* @param index which specifier to return, from 0 to n-1, where
|
||||
* n = countAvailableSources()
|
||||
* @param result fill-in paramter to receive the source specifier.
|
||||
* If index is out of range, result will be empty.
|
||||
* @return reference to result
|
||||
*/
|
||||
static UnicodeString& getAvailableSource(int32_t index,
|
||||
UnicodeString& result);
|
||||
|
||||
/**
|
||||
* Return the number of registered target specifiers for a given
|
||||
* source specifier.
|
||||
*/
|
||||
static int32_t countAvailableTargets(const UnicodeString& source);
|
||||
|
||||
/**
|
||||
* Return a registered target specifier for a given source.
|
||||
* @param index which specifier to return, from 0 to n-1, where
|
||||
* n = countAvailableTargets(source)
|
||||
* @param source the source specifier
|
||||
* @param result fill-in paramter to receive the target specifier.
|
||||
* If source is invalid or if index is out of range, result will
|
||||
* be empty.
|
||||
* @return reference to result
|
||||
*/
|
||||
static UnicodeString& getAvailableTarget(int32_t index,
|
||||
const UnicodeString& source,
|
||||
UnicodeString& result);
|
||||
|
||||
/**
|
||||
* Return the number of registered variant specifiers for a given
|
||||
* source-target pair.
|
||||
*/
|
||||
static int32_t countAvailableVariants(const UnicodeString& source,
|
||||
const UnicodeString& target);
|
||||
|
||||
/**
|
||||
* Return a registered variant specifier for a given source-target
|
||||
* pair.
|
||||
* @param index which specifier to return, from 0 to n-1, where
|
||||
* n = countAvailableVariants(source, target)
|
||||
* @param source the source specifier
|
||||
* @param target the target specifier
|
||||
* @param result fill-in paramter to receive the variant
|
||||
* specifier. If source is invalid or if target is invalid or if
|
||||
* index is out of range, result will be empty.
|
||||
* @return reference to result
|
||||
*/
|
||||
static UnicodeString& getAvailableVariant(int32_t index,
|
||||
const UnicodeString& source,
|
||||
const UnicodeString& target,
|
||||
UnicodeString& result);
|
||||
|
||||
/**
|
||||
* Return the class ID for this class. This is useful only for
|
||||
* comparing to a return value from getDynamicClassID(). For example:
|
||||
|
@ -14,7 +14,7 @@
|
||||
/**
|
||||
* ID for this transliterator.
|
||||
*/
|
||||
const char* UnicodeToHexTransliterator::_ID = "Unicode-Hex";
|
||||
const char* UnicodeToHexTransliterator::_ID = "Any-Hex";
|
||||
|
||||
const UChar UnicodeToHexTransliterator::HEX_DIGITS[32] = {
|
||||
// Use Unicode hex values for EBCDIC compatibility
|
||||
|
@ -188,7 +188,7 @@ void CompoundTransliteratorTest::TestGetCount(){
|
||||
logln("Testing the getCount() API of CompoundTransliterator");
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
CompoundTransliterator *ct1=new CompoundTransliterator("Halfwidth-Fullwidth;Fullwidth-Halfwidth", status);
|
||||
CompoundTransliterator *ct2=new CompoundTransliterator("Unicode-Hex;Hex-Unicode;Cyrillic-Latin;Latin-Cyrillic", status);
|
||||
CompoundTransliterator *ct2=new CompoundTransliterator("Any-Hex;Hex-Any;Cyrillic-Latin;Latin-Cyrillic", status);
|
||||
CompoundTransliterator *ct3=(CompoundTransliterator*)ct1;
|
||||
CompoundTransliterator *ct4=new CompoundTransliterator("Latin-Devanagari", status);
|
||||
CompoundTransliterator *ct5=new CompoundTransliterator(*ct4);
|
||||
@ -211,7 +211,7 @@ void CompoundTransliteratorTest::TestGetCount(){
|
||||
|
||||
void CompoundTransliteratorTest::TestGetSetAdoptTransliterator(){
|
||||
logln("Testing the getTransliterator() API of CompoundTransliterator");
|
||||
UnicodeString ID("Latin-Greek;Greek-Latin;Latin-Devanagari;Devanagari-Latin;Latin-Cyrillic;Cyrillic-Latin;Unicode-Hex;Hex-Unicode");
|
||||
UnicodeString ID("Latin-Greek;Greek-Latin;Latin-Devanagari;Devanagari-Latin;Latin-Cyrillic;Cyrillic-Latin;Any-Hex;Hex-Any");
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
CompoundTransliterator *ct1=new CompoundTransliterator(ID, status);
|
||||
if(U_FAILURE(status)){
|
||||
@ -233,7 +233,7 @@ void CompoundTransliteratorTest::TestGetSetAdoptTransliterator(){
|
||||
|
||||
|
||||
logln("Testing setTransliterator() API of CompoundTransliterator");
|
||||
UnicodeString ID2("Hex-Unicode;Unicode-Hex;Latin-Cyrillic;Cyrillic-Latin;Halfwidth-Fullwidth;Fullwidth-Halfwidth");
|
||||
UnicodeString ID2("Hex-Any;Any-Hex;Latin-Cyrillic;Cyrillic-Latin;Halfwidth-Fullwidth;Fullwidth-Halfwidth");
|
||||
array=split(ID2, 0x003b, count);
|
||||
Transliterator** transarray=new Transliterator*[count];
|
||||
for(i=0;i<count;i++){
|
||||
@ -306,7 +306,7 @@ UnicodeString* CompoundTransliteratorTest::split(const UnicodeString& str, UChar
|
||||
void CompoundTransliteratorTest::TestTransliterate(){
|
||||
logln("Testing the handleTransliterate() API of CompoundTransliterator");
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
CompoundTransliterator *ct1=new CompoundTransliterator("Unicode-Hex;Hex-Unicode", status);
|
||||
CompoundTransliterator *ct1=new CompoundTransliterator("Any-Hex;Hex-Any", status);
|
||||
if(U_FAILURE(status)){
|
||||
errln("CompoundTransliterator construction failed");
|
||||
}else {
|
||||
@ -327,8 +327,8 @@ void CompoundTransliteratorTest::TestTransliterate(){
|
||||
delete ct1;
|
||||
UnicodeString Data[]={
|
||||
//ID, input string, transliterated string
|
||||
"Unicode-Hex;Hex-Unicode;Unicode-Hex", "hello", UnicodeString("\\u0068\\u0065\\u006C\\u006C\\u006F", ""),
|
||||
"Unicode-Hex;Hex-Unicode", "hello! How are you?", "hello! How are you?",
|
||||
"Any-Hex;Hex-Any;Any-Hex", "hello", UnicodeString("\\u0068\\u0065\\u006C\\u006C\\u006F", ""),
|
||||
"Any-Hex;Hex-Any", "hello! How are you?", "hello! How are you?",
|
||||
"Devanagari-Latin;Latin-Devanagari", CharsToUnicodeString("\\u092D\\u0948'\\u0930'\\u0935"), CharsToUnicodeString("\\u092D\\u0948\\u0930\\u0935"), // quotes lost
|
||||
"Latin-Cyrillic;Cyrillic-Latin", "a'b'k'd'e'f'g'h'i'j'Shch'shch'zh'h", "abkdefghijShchshchzhh",
|
||||
"Latin-Greek;Greek-Latin", "ABGabgAKLMN", "ABGabgAKLMN",
|
||||
|
@ -150,8 +150,8 @@ void TransliteratorAPITest::TestgetInverse() {
|
||||
"Latin-Cyrillic",
|
||||
"Devanagari-Latin",
|
||||
"Latin-Devanagari",
|
||||
"Unicode-Hex",
|
||||
"Hex-Unicode"
|
||||
"Any-Hex",
|
||||
"Hex-Any"
|
||||
};
|
||||
for(uint32_t i=0; i<sizeof(TransID)/sizeof(TransID[0]); i=i+2){
|
||||
t1=Transliterator::createInstance(TransID[i]);
|
||||
@ -195,13 +195,13 @@ void TransliteratorAPITest::TestGetDisplayName() {
|
||||
UnicodeString dispNames[]= {
|
||||
//ID, displayName
|
||||
"CurlyQuotes-StraightQuotes" ,"CurlyQuotes to StraightQuotes",
|
||||
"Unicode-Hex" ,"Unicode to Hex Escape",
|
||||
"Any-Hex" ,"Any to Hex Escape",
|
||||
"Halfwidth-Fullwidth" ,"Halfwidth to Fullwidth" ,
|
||||
"Latin-Arabic" ,"Latin to Arabic" ,
|
||||
"Latin-Devanagari" ,"Latin to Devanagari" ,
|
||||
"Greek-Latin" ,"Greek to Latin" ,
|
||||
"Arabic-Latin" ,"Arabic to Latin" ,
|
||||
"Hex-Unicode" ,"Hex Escape to Unicode",
|
||||
"Hex-Any" ,"Hex Escape to Any",
|
||||
"Cyrillic-Latin" ,"Cyrillic to Latin" ,
|
||||
"Latin-Greek" ,"Latin to Greek" ,
|
||||
"Latin-Kana" ,"Latin to Kana" ,
|
||||
@ -239,8 +239,8 @@ void TransliteratorAPITest::TestTransliterate1(){
|
||||
|
||||
UnicodeString Data[]={
|
||||
//ID, input string, transliterated string
|
||||
"Unicode-Hex", "hello", UnicodeString("\\u0068\\u0065\\u006C\\u006C\\u006F", "") ,
|
||||
"Hex-Unicode", UnicodeString("\\u0068\\u0065\\u006C\\u006C\\u006F", ""), "hello" ,
|
||||
"Any-Hex", "hello", UnicodeString("\\u0068\\u0065\\u006C\\u006C\\u006F", "") ,
|
||||
"Hex-Any", UnicodeString("\\u0068\\u0065\\u006C\\u006C\\u006F", ""), "hello" ,
|
||||
"Latin-Devanagari", "bhaarata", CharsToUnicodeString("\\u092D\\u093E\\u0930\\u0924") ,
|
||||
"Devanagari-Latin", CharsToUnicodeString("\\u092D\\u093E\\u0930\\u0924"), "bhaarata" ,
|
||||
// "Contracted-Expanded", CharsToUnicodeString("\\u00C0\\u00C1\\u0042"), CharsToUnicodeString("\\u0041\\u0300\\u0041\\u0301\\u0042") ,
|
||||
@ -283,9 +283,9 @@ void TransliteratorAPITest::TestTransliterate2(){
|
||||
//testing tranliterate(String text, int start, int limit, StringBuffer result)
|
||||
UnicodeString Data2[]={
|
||||
//ID, input string, start, limit, transliterated string
|
||||
"Unicode-Hex", "hello! How are you?", "0", "5", UnicodeString("\\u0068\\u0065\\u006C\\u006C\\u006F", ""), UnicodeString("\\u0068\\u0065\\u006C\\u006C\\u006F! How are you?", "") ,
|
||||
"Unicode-Hex", "hello! How are you?", "7", "12", UnicodeString("\\u0048\\u006F\\u0077\\u0020\\u0061", ""), UnicodeString("hello! \\u0048\\u006F\\u0077\\u0020\\u0061re you?", ""),
|
||||
"Hex-Unicode", CharsToUnicodeString("\\u0068\\u0065\\u006C\\u006C\\u006F\\u0021\\u0020"), "0", "5", "hello", "hello! " ,
|
||||
"Any-Hex", "hello! How are you?", "0", "5", UnicodeString("\\u0068\\u0065\\u006C\\u006C\\u006F", ""), UnicodeString("\\u0068\\u0065\\u006C\\u006C\\u006F! How are you?", "") ,
|
||||
"Any-Hex", "hello! How are you?", "7", "12", UnicodeString("\\u0048\\u006F\\u0077\\u0020\\u0061", ""), UnicodeString("hello! \\u0048\\u006F\\u0077\\u0020\\u0061re you?", ""),
|
||||
"Hex-Any", CharsToUnicodeString("\\u0068\\u0065\\u006C\\u006C\\u006F\\u0021\\u0020"), "0", "5", "hello", "hello! " ,
|
||||
// "Contracted-Expanded", CharsToUnicodeString("\\u00C0\\u00C1\\u0042"), "1", "2", CharsToUnicodeString("\\u0041\\u0301"), CharsToUnicodeString("\\u00C0\\u0041\\u0301\\u0042") ,
|
||||
"Devanagari-Latin", CharsToUnicodeString("\\u092D\\u093E\\u0930\\u0924"), "0", "1", "bha", CharsToUnicodeString("bha\\u093E\\u0930\\u0924") ,
|
||||
"Devanagari-Latin", CharsToUnicodeString("\\u092D\\u093E\\u0930\\u0924"), "1", "2", "aa", CharsToUnicodeString("\\u092Daa\\u0930\\u0924")
|
||||
@ -316,7 +316,7 @@ void TransliteratorAPITest::TestTransliterate2(){
|
||||
|
||||
|
||||
logln("\n Try calling transliterate with illegal start and limit values");
|
||||
t=Transliterator::createInstance("Unicode-Hex");
|
||||
t=Transliterator::createInstance("Any-Hex");
|
||||
gotResBuf = temp = "try start greater than limit";
|
||||
t->transliterate(gotResBuf, 10, 5);
|
||||
if(gotResBuf == temp)
|
||||
@ -337,7 +337,7 @@ void TransliteratorAPITest::TestTransliterate3(){
|
||||
};
|
||||
int start, limit;
|
||||
UnicodeString message;
|
||||
Transliterator *t=Transliterator::createInstance("Unicode-Hex");
|
||||
Transliterator *t=Transliterator::createInstance("Any-Hex");
|
||||
if(t == 0)
|
||||
errln("FAIL : construction");
|
||||
for(uint32_t i=0; i<sizeof(Data)/sizeof(Data[0]); i=i+3){
|
||||
@ -352,7 +352,7 @@ void TransliteratorAPITest::TestTransliterate3(){
|
||||
void TransliteratorAPITest::TestSimpleKeyboardTransliterator(){
|
||||
logln("simple call to transliterate");
|
||||
UErrorCode status=U_ZERO_ERROR;
|
||||
Transliterator* t=Transliterator::createInstance("Unicode-Hex");
|
||||
Transliterator* t=Transliterator::createInstance("Any-Hex");
|
||||
if(t == 0)
|
||||
errln("FAIL : construction");
|
||||
UTransPosition index={19,20,20,20};
|
||||
@ -400,7 +400,7 @@ void TransliteratorAPITest::TestKeyboardTransliterator1(){
|
||||
"", UnicodeString("\\u0061\\u0062\\u007A", "")
|
||||
|
||||
};
|
||||
Transliterator* t=Transliterator::createInstance("Unicode-Hex");
|
||||
Transliterator* t=Transliterator::createInstance("Any-Hex");
|
||||
//keyboardAux(t, Data);
|
||||
UTransPosition index={0, 0, 0, 0};
|
||||
UErrorCode status=U_ZERO_ERROR;
|
||||
@ -452,7 +452,7 @@ void TransliteratorAPITest::TestKeyboardTransliterator1(){
|
||||
void TransliteratorAPITest::TestKeyboardTransliterator2(){
|
||||
UnicodeString Data[]={
|
||||
//insertion, buffer, index[START], index[LIMIT], index[CURSOR]
|
||||
//data for Unicode-Hex
|
||||
//data for Any-Hex
|
||||
"abc", UnicodeString("Initial String: add-\\u0061\\u0062\\u0063-", ""), "19", "20", "20",
|
||||
"a", UnicodeString("In\\u0069\\u0061tial String: add-\\u0061\\u0062\\u0063-", ""), "2", "3", "2" ,
|
||||
"b", UnicodeString("\\u0062In\\u0069\\u0061tial String: add-\\u0061\\u0062\\u0063-", ""), "0", "0", "0" ,
|
||||
@ -473,7 +473,7 @@ void TransliteratorAPITest::TestKeyboardTransliterator2(){
|
||||
logln("Testing transliterate(Replaceable, int32_t, UnicodeString, UErrorCode)");
|
||||
|
||||
rs="Initial String: add--";
|
||||
t=Transliterator::createInstance("Unicode-Hex");
|
||||
t=Transliterator::createInstance("Any-Hex");
|
||||
if(t == 0)
|
||||
errln("FAIL : construction");
|
||||
else {
|
||||
@ -508,7 +508,7 @@ void TransliteratorAPITest::TestKeyboardTransliterator3(){
|
||||
UErrorCode status=U_ZERO_ERROR;
|
||||
UTransPosition index={0, 0, 0, 0};
|
||||
logln("Testing transliterate(Replaceable, int32_t, UErrorCode)");
|
||||
Transliterator *t=Transliterator::createInstance("Unicode-Hex");
|
||||
Transliterator *t=Transliterator::createInstance("Any-Hex");
|
||||
if(t == 0)
|
||||
errln("FAIL : construction");
|
||||
for(uint32_t i=0; i<sizeof(Data)/sizeof(Data[0]); i=i+4){
|
||||
@ -650,7 +650,7 @@ class TestFilter3 : public UnicodeFilter {
|
||||
|
||||
|
||||
void TransliteratorAPITest::TestGetAdoptFilter(){
|
||||
Transliterator *t=Transliterator::createInstance("Unicode-Hex");
|
||||
Transliterator *t=Transliterator::createInstance("Any-Hex");
|
||||
if(t == 0)
|
||||
errln("FAIL : construction");
|
||||
const UnicodeFilter *u=t->getFilter();
|
||||
|
@ -69,6 +69,7 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
|
||||
TESTCASE(33,TestContext);
|
||||
TESTCASE(34,TestSupplemental);
|
||||
TESTCASE(35,TestQuantifier);
|
||||
TESTCASE(36,TestSTV);
|
||||
default: name = ""; break;
|
||||
}
|
||||
}
|
||||
@ -444,8 +445,8 @@ void TransliteratorTest::TestCompoundKana(void) {
|
||||
* Compose the hex transliterators forward and reverse.
|
||||
*/
|
||||
void TransliteratorTest::TestCompoundHex(void) {
|
||||
Transliterator* a = Transliterator::createInstance("Unicode-Hex");
|
||||
Transliterator* b = Transliterator::createInstance("Hex-Unicode");
|
||||
Transliterator* a = Transliterator::createInstance("Any-Hex");
|
||||
Transliterator* b = Transliterator::createInstance("Hex-Any");
|
||||
Transliterator* transab[] = { a, b };
|
||||
Transliterator* transba[] = { b, a };
|
||||
if (a == 0 || b == 0) {
|
||||
@ -490,9 +491,9 @@ class TestFilter : public UnicodeFilter {
|
||||
* Do some basic tests of filtering.
|
||||
*/
|
||||
void TransliteratorTest::TestFiltering(void) {
|
||||
Transliterator* hex = Transliterator::createInstance("Unicode-Hex");
|
||||
Transliterator* hex = Transliterator::createInstance("Any-Hex");
|
||||
if (hex == 0) {
|
||||
errln("FAIL: createInstance(Unicode-Hex) failed");
|
||||
errln("FAIL: createInstance(Any-Hex) failed");
|
||||
return;
|
||||
}
|
||||
hex->adoptFilter(new TestFilter());
|
||||
@ -644,7 +645,7 @@ void TransliteratorTest::TestJ243(void) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
#if !defined(HPUX)
|
||||
// Test default Hex-Unicode, which should handle
|
||||
// Test default Hex-Any, which should handle
|
||||
// \u, \U, u+, and U+
|
||||
HexToUnicodeTransliterator hex;
|
||||
expect(hex, UnicodeString("\\u0041+\\U0042,u+0043uu+0044z", ""), "A+B,CuDz");
|
||||
@ -654,7 +655,7 @@ void TransliteratorTest::TestJ243(void) {
|
||||
HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), status);
|
||||
expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x0123", ""),
|
||||
"abcd5fx0123");
|
||||
// Try custom Unicode-Hex (default is tested elsewhere)
|
||||
// Try custom Any-Hex (default is tested elsewhere)
|
||||
status = U_ZERO_ERROR;
|
||||
UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), status);
|
||||
expect(hex3, "012", "012");
|
||||
@ -952,13 +953,13 @@ void TransliteratorTest::TestFilterIDs(void) {
|
||||
// Array of 3n strings:
|
||||
// <id>, <inverse id>, <input>, <expected output>
|
||||
const char* DATA[] = {
|
||||
"Unicode[aeiou]-Hex",
|
||||
"Hex[aeiou]-Unicode",
|
||||
"Any[aeiou]-Hex",
|
||||
"Hex[aeiou]-Any",
|
||||
"quizzical",
|
||||
"q\\u0075\\u0069zz\\u0069c\\u0061l",
|
||||
|
||||
"Unicode[aeiou]-Hex;Hex[^5]-Unicode",
|
||||
"Unicode[^5]-Hex;Hex[aeiou]-Unicode",
|
||||
"Any[aeiou]-Hex;Hex[^5]-Any",
|
||||
"Any[^5]-Hex;Hex[aeiou]-Any",
|
||||
"quizzical",
|
||||
"q\\u0075izzical",
|
||||
|
||||
@ -1218,7 +1219,7 @@ void TransliteratorTest::TestCompoundRBT(void) {
|
||||
// Careful with spacing and ';' here: Phrase this exactly
|
||||
// as toRules() is going to return it. If toRules() changes
|
||||
// with regard to spacing or ';', then adjust this string.
|
||||
UnicodeString rule("::Hex-Unicode;\n"
|
||||
UnicodeString rule("::Hex-Any;\n"
|
||||
"::Any-Lower;\n"
|
||||
"a > '.A.';\n"
|
||||
"b > '.B.';\n"
|
||||
@ -1553,6 +1554,42 @@ void TransliteratorTest::TestQuantifier() {
|
||||
"bb x xb");
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Source-Target/Variant.
|
||||
*/
|
||||
void TransliteratorTest::TestSTV(void) {
|
||||
int32_t ns = Transliterator::countAvailableSources();
|
||||
for (int32_t i=0; i<ns; ++i) {
|
||||
UnicodeString source;
|
||||
Transliterator::getAvailableSource(i, source);
|
||||
logln((UnicodeString)"" + i + ": " + source);
|
||||
if (source.length() == 0) {
|
||||
errln("FAIL: empty source");
|
||||
continue;
|
||||
}
|
||||
int32_t nt = Transliterator::countAvailableTargets(source);
|
||||
for (int32_t j=0; j<nt; ++j) {
|
||||
UnicodeString target;
|
||||
Transliterator::getAvailableTarget(j, source, target);
|
||||
logln((UnicodeString)" " + j + ": " + target);
|
||||
if (target.length() == 0) {
|
||||
errln("FAIL: empty target");
|
||||
continue;
|
||||
}
|
||||
int32_t nv = Transliterator::countAvailableVariants(source, target);
|
||||
for (int32_t k=0; j<nv; ++k) {
|
||||
UnicodeString variant;
|
||||
Transliterator::getAvailableVariant(k, source, target, variant);
|
||||
logln((UnicodeString)" " + k + ": " + variant);
|
||||
if (variant.length() == 0) {
|
||||
errln("FAIL: empty variant");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//======================================================================
|
||||
// Support methods
|
||||
//======================================================================
|
||||
|
@ -174,6 +174,11 @@ class TransliteratorTest : public IntlTest {
|
||||
|
||||
void TestQuantifier(void);
|
||||
|
||||
/**
|
||||
* Test Source-Target/Variant.
|
||||
*/
|
||||
void TestSTV(void);
|
||||
|
||||
//======================================================================
|
||||
// Support methods
|
||||
//======================================================================
|
||||
|
@ -61,7 +61,7 @@ class Filter2: public UnicodeFilter{
|
||||
|
||||
void UnicodeFilterLogicTest::TestAll(){
|
||||
|
||||
Transliterator *t1=Transliterator::createInstance("Unicode-Hex");
|
||||
Transliterator *t1=Transliterator::createInstance("Any-Hex");
|
||||
if(t1 == 0){
|
||||
errln("FAIL: Error in instantiation.");
|
||||
return;
|
||||
|
Loading…
Reference in New Issue
Block a user