0d06bd5306
X-SVN-Rev: 7020
1290 lines
49 KiB
C++
1290 lines
49 KiB
C++
/*
|
|
**********************************************************************
|
|
* Copyright (C) 2001, International Business Machines
|
|
* Corporation and others. All Rights Reserved.
|
|
**********************************************************************
|
|
* Date Name Description
|
|
* 05/23/00 aliu Creation.
|
|
**********************************************************************
|
|
*/
|
|
#include "unicode/utypes.h"
|
|
#include "unicode/translit.h"
|
|
#include "unicode/rbt.h"
|
|
#include "unicode/uniset.h"
|
|
#include "unicode/unicode.h"
|
|
#include "unicode/normlzr.h"
|
|
#include "unicode/uchar.h"
|
|
#include "transrt.h"
|
|
#include "testutil.h"
|
|
|
|
#define CASE(id,test) case id: \
|
|
name = #test; \
|
|
if (exec) { \
|
|
logln(#test "---"); \
|
|
logln((UnicodeString)""); \
|
|
test(); \
|
|
} \
|
|
break
|
|
|
|
#define EXHAUSTIVE(id,test) case id: \
|
|
if(quick==FALSE){ \
|
|
name = #test; \
|
|
if (exec){ \
|
|
logln(#test "---"); \
|
|
logln((UnicodeString)""); \
|
|
test(); \
|
|
} \
|
|
}else{ \
|
|
name=""; \
|
|
} \
|
|
break
|
|
void
|
|
TransliteratorRoundTripTest::runIndexedTest(int32_t index, UBool exec,
|
|
const char* &name, char* /*par*/) {
|
|
switch (index) {
|
|
CASE(0,TestHiragana);
|
|
CASE(1,TestKatakana);
|
|
//CASE(2,TestArabic);
|
|
//CASE(3,TestHebrew);
|
|
CASE(2,TestGreek);
|
|
CASE(3,Testel);
|
|
CASE(4,TestCyrillic);
|
|
CASE(5,TestDevanagariLatin);
|
|
|
|
#ifdef ENABLE_FAILING_TESTS
|
|
CASE(6,TestJamo);
|
|
CASE(7,TestJamoHangul);
|
|
#else
|
|
case 6: /* falls through */
|
|
case 7: /* falls through */
|
|
#endif
|
|
EXHAUSTIVE(8,TestDevanagariBengali);
|
|
EXHAUSTIVE(9,TestDevanagariGurmukhi);
|
|
EXHAUSTIVE(10,TestDevanagariGujarati);
|
|
EXHAUSTIVE(11,TestDevanagariOriya);
|
|
EXHAUSTIVE(12,TestDevanagariTamil);
|
|
EXHAUSTIVE(13,TestDevanagariTelugu);
|
|
EXHAUSTIVE(14,TestDevanagariKannada);
|
|
EXHAUSTIVE(15,TestDevanagariMalayalam);
|
|
EXHAUSTIVE(16,TestBengaliGurmukhi);
|
|
EXHAUSTIVE(17,TestBengaliGujarati);
|
|
EXHAUSTIVE(18,TestBengaliOriya);
|
|
EXHAUSTIVE(19,TestBengaliTamil);
|
|
EXHAUSTIVE(20,TestBengaliTelugu);
|
|
EXHAUSTIVE(21,TestBengaliKannada);
|
|
EXHAUSTIVE(22,TestBengaliMalayalam );
|
|
EXHAUSTIVE(23,TestGurmukhiGujarati);
|
|
EXHAUSTIVE(24,TestGurmukhiOriya);
|
|
EXHAUSTIVE(25,TestGurmukhiTamil);
|
|
EXHAUSTIVE(26,TestGurmukhiTelugu);
|
|
EXHAUSTIVE(27,TestGurmukhiKannada);
|
|
EXHAUSTIVE(28,TestGurmukhiMalayalam);
|
|
EXHAUSTIVE(29,TestGujaratiOriya);
|
|
EXHAUSTIVE(30,TestGujaratiTamil);
|
|
EXHAUSTIVE(31,TestGujaratiTelugu);
|
|
EXHAUSTIVE(32,TestGujaratiKannada);
|
|
EXHAUSTIVE(33,TestGujaratiMalayalam);
|
|
EXHAUSTIVE(34,TestOriyaTamil);
|
|
EXHAUSTIVE(35,TestOriyaTelugu);
|
|
EXHAUSTIVE(36,TestOriyaKannada);
|
|
EXHAUSTIVE(37,TestOriyaMalayalam);
|
|
EXHAUSTIVE(38,TestTamilTelugu);
|
|
EXHAUSTIVE(39,TestTamilKannada);
|
|
EXHAUSTIVE(40,TestTamilMalayalam);
|
|
EXHAUSTIVE(41,TestTeluguKannada);
|
|
EXHAUSTIVE(42,TestTeluguMalayalam);
|
|
EXHAUSTIVE(43,TestKannadaMalayalam);
|
|
default: name = ""; break;
|
|
}
|
|
}
|
|
|
|
//--------------------------------------------------------------------
|
|
// BitSet
|
|
//--------------------------------------------------------------------
|
|
|
|
/**
|
|
* Tiny and incomplete BitSet. Hardcoded to support 0..FFFF.
|
|
*/
|
|
class BitSet {
|
|
int32_t bits[65536/32];
|
|
|
|
public:
|
|
BitSet();
|
|
~BitSet();
|
|
void clear();
|
|
void set(int32_t x);
|
|
UBool get(int32_t x) const;
|
|
};
|
|
|
|
BitSet::BitSet() {
|
|
clear();
|
|
}
|
|
|
|
BitSet::~BitSet() {
|
|
}
|
|
|
|
void BitSet::clear() {
|
|
int32_t *limit = bits + 65536/32;
|
|
int32_t *p = bits;
|
|
while (p < limit) *p++ = 0;
|
|
}
|
|
|
|
void BitSet::set(int32_t x) {
|
|
x &= 0xFFFF;
|
|
int32_t i = x / 32;
|
|
int32_t bit = 1L << (x & 31);
|
|
bits[i] |= bit;
|
|
}
|
|
|
|
UBool BitSet::get(int32_t x) const {
|
|
x &= 0xFFFF;
|
|
int32_t i = x / 32;
|
|
int32_t bit = 1L << (x & 31);
|
|
return (bits[i] & bit) != 0L;
|
|
}
|
|
|
|
//--------------------------------------------------------------------
|
|
// Legal
|
|
//--------------------------------------------------------------------
|
|
|
|
class Legal {
|
|
public:
|
|
Legal() {}
|
|
virtual ~Legal() {}
|
|
virtual UBool is(const UnicodeString& /*sourceString*/) const {return TRUE;}
|
|
};
|
|
|
|
class LegalGreek : public Legal {
|
|
UBool full;
|
|
public:
|
|
LegalGreek(UBool _full) { full = _full; }
|
|
virtual ~LegalGreek() {}
|
|
|
|
virtual UBool is(const UnicodeString& sourceString) const;
|
|
|
|
static UBool isVowel(UChar c);
|
|
|
|
static UBool isRho(UChar c);
|
|
};
|
|
|
|
UBool LegalGreek::is(const UnicodeString& sourceString) const {
|
|
UnicodeString decomp;
|
|
UErrorCode ec = U_ZERO_ERROR;
|
|
Normalizer::decompose(sourceString, FALSE, 0, decomp, ec);
|
|
|
|
// modern is simpler: don't care about anything but a grave
|
|
if (!full) {
|
|
if (sourceString == CharsToUnicodeString("\\u039C\\u03C0"))
|
|
return FALSE;
|
|
for (int32_t i = 0; i < decomp.length(); ++i) {
|
|
UChar c = decomp.charAt(i);
|
|
// exclude all the accents
|
|
if (c == 0x0313 || c == 0x0314 || c == 0x0300 || c == 0x0302
|
|
|| c == 0x0342 || c == 0x0345
|
|
) return FALSE;
|
|
}
|
|
return TRUE;
|
|
}
|
|
|
|
// Legal greek has breathing marks IFF there is a vowel or RHO at the start
|
|
// IF it has them, it has exactly one.
|
|
// IF it starts with a RHO, then the breathing mark must come before the second letter.
|
|
// Since there are no surrogates in greek, don't worry about them
|
|
UBool firstIsVowel = FALSE;
|
|
UBool firstIsRho = FALSE;
|
|
UBool noLetterYet = TRUE;
|
|
int32_t breathingCount = 0;
|
|
int32_t letterCount = 0;
|
|
for (int32_t i = 0; i < decomp.length(); ++i) {
|
|
UChar c = decomp.charAt(i);
|
|
if (u_isalpha(c)) {
|
|
++letterCount;
|
|
if (noLetterYet) {
|
|
noLetterYet = FALSE;
|
|
firstIsVowel = isVowel(c);
|
|
firstIsRho = isRho(c);
|
|
}
|
|
if (firstIsRho && letterCount == 2 && breathingCount == 0) return FALSE;
|
|
}
|
|
if (c == 0x0313 || c == 0x0314) {
|
|
++breathingCount;
|
|
}
|
|
}
|
|
|
|
if (firstIsVowel || firstIsRho) return breathingCount == 1;
|
|
return breathingCount == 0;
|
|
}
|
|
|
|
UBool LegalGreek::isVowel(UChar c) {
|
|
switch (c) {
|
|
case 0x03B1:
|
|
case 0x03B5:
|
|
case 0x03B7:
|
|
case 0x03B9:
|
|
case 0x03BF:
|
|
case 0x03C5:
|
|
case 0x03C9:
|
|
case 0x0391:
|
|
case 0x0395:
|
|
case 0x0397:
|
|
case 0x0399:
|
|
case 0x039F:
|
|
case 0x03A5:
|
|
case 0x03A9:
|
|
return TRUE;
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
UBool LegalGreek::isRho(UChar c) {
|
|
switch (c) {
|
|
case 0x03C1:
|
|
case 0x03A1:
|
|
return TRUE;
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
class LegalDeleter {
|
|
Legal* obj;
|
|
Legal*& zeroMe;
|
|
public:
|
|
LegalDeleter(Legal* adopted, Legal*& ptrToClean) :
|
|
obj(adopted),
|
|
zeroMe(ptrToClean) {}
|
|
~LegalDeleter() { delete obj; zeroMe = NULL; }
|
|
};
|
|
|
|
//--------------------------------------------------------------------
|
|
// RTTest Interface
|
|
//--------------------------------------------------------------------
|
|
|
|
class RTTest {
|
|
|
|
// PrintWriter out;
|
|
|
|
UnicodeString transliteratorID;
|
|
int8_t sourceScript;
|
|
int8_t targetScript;
|
|
int32_t errorLimit;
|
|
int32_t errorCount;
|
|
int32_t pairLimit;
|
|
UnicodeSet sourceRange;
|
|
UnicodeSet targetRange;
|
|
UnicodeSet roundtripExclusions;
|
|
IntlTest* log;
|
|
Legal* legalSource; // NOT owned
|
|
UnicodeSet badCharacters;
|
|
|
|
public:
|
|
|
|
/*
|
|
* create a test for the given script transliterator.
|
|
*/
|
|
RTTest(const UnicodeString& transliteratorIDStr,
|
|
int8_t sourceScriptVal, int8_t targetScriptVal);
|
|
|
|
virtual ~RTTest();
|
|
|
|
void setErrorLimit(int32_t limit);
|
|
|
|
void setPairLimit(int32_t limit);
|
|
|
|
void test(const UnicodeString& sourceRange,
|
|
const UnicodeString& targetRange,
|
|
const char* roundtripExclusions,
|
|
IntlTest* log,
|
|
Legal* adoptedLegal);
|
|
|
|
private:
|
|
|
|
// Added to do better equality check.
|
|
|
|
static UBool isSame(const UnicodeString& a, const UnicodeString& b);
|
|
|
|
UBool includesSome(const UnicodeSet& set, const UnicodeString& a);
|
|
|
|
static UBool isCamel(const UnicodeString& a);
|
|
|
|
void test2();
|
|
|
|
void logWrongScript(const UnicodeString& label,
|
|
const UnicodeString& from,
|
|
const UnicodeString& to);
|
|
void logRoundTripFailure(const UnicodeString& from,
|
|
const UnicodeString& to,
|
|
const UnicodeString& back);
|
|
void logNotCanonical(const UnicodeString& label,
|
|
const UnicodeString& from,
|
|
const UnicodeString& to,
|
|
const UnicodeString& toCan);
|
|
|
|
protected:
|
|
|
|
/*
|
|
* Characters to filter for source-target mapping completeness
|
|
* Typically is base alphabet, minus extended characters
|
|
* Default is ASCII letters for Latin
|
|
*/
|
|
virtual UBool isSource(UChar c);
|
|
|
|
/*
|
|
* Characters to check for target back to source mapping.
|
|
* Typically the same as the target script, plus punctuation
|
|
*/
|
|
inline UBool isReceivingSource(UChar c);
|
|
|
|
/*
|
|
* Characters to filter for target-source mapping
|
|
* Typically is base alphabet, minus extended characters
|
|
*/
|
|
inline UBool isTarget(UChar c);
|
|
|
|
/*
|
|
* Characters to check for target-source mapping
|
|
* Typically the same as the source script, plus punctuation
|
|
*/
|
|
inline UBool isReceivingTarget(UChar c);
|
|
|
|
UBool isSource(const UnicodeString& s);
|
|
UBool isTarget(const UnicodeString& s);
|
|
UBool isReceivingSource(const UnicodeString& s);
|
|
UBool isReceivingTarget(const UnicodeString& s);
|
|
};
|
|
|
|
//--------------------------------------------------------------------
|
|
// RTTest Implementation
|
|
//--------------------------------------------------------------------
|
|
|
|
/*
|
|
* create a test for the given script transliterator.
|
|
*/
|
|
RTTest::RTTest(const UnicodeString& transliteratorIDStr,
|
|
int8_t sourceScriptVal, int8_t targetScriptVal) {
|
|
this->transliteratorID = transliteratorIDStr;
|
|
this->sourceScript = sourceScriptVal;
|
|
this->targetScript = targetScriptVal;
|
|
legalSource = NULL;
|
|
errorLimit = (int32_t)0x7FFFFFFFL;
|
|
errorCount = 0;
|
|
pairLimit = 0x10000;
|
|
}
|
|
|
|
RTTest::~RTTest() {
|
|
}
|
|
|
|
void RTTest::setErrorLimit(int32_t limit) {
|
|
errorLimit = limit;
|
|
}
|
|
|
|
void RTTest::setPairLimit(int32_t limit) {
|
|
pairLimit = limit;
|
|
}
|
|
|
|
UBool RTTest::isSame(const UnicodeString& a, const UnicodeString& b) {
|
|
if (a == b) return TRUE;
|
|
if (a.caseCompare(b, U_FOLD_CASE_DEFAULT)==0 && isCamel(a)) return TRUE;
|
|
UnicodeString aa, bb;
|
|
UErrorCode ec = U_ZERO_ERROR;
|
|
Normalizer::decompose(a, FALSE, 0, aa, ec);
|
|
Normalizer::decompose(b, FALSE, 0, bb, ec);
|
|
if (aa == bb) return TRUE;
|
|
if (aa.caseCompare(bb, U_FOLD_CASE_DEFAULT)==0 && isCamel(aa)) return TRUE;
|
|
return FALSE;
|
|
}
|
|
|
|
UBool RTTest::includesSome(const UnicodeSet& set, const UnicodeString& a) {
|
|
UChar32 cp;
|
|
for (int32_t i = 0; i < a.length(); i += UTF_CHAR_LENGTH(cp)) {
|
|
cp = a.char32At(i);
|
|
if (set.contains(cp)) return TRUE;
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
UBool RTTest::isCamel(const UnicodeString& a) {
|
|
// see if string is of the form aB; e.g. lower, then upper or title
|
|
UChar32 cp;
|
|
UBool haveLower = FALSE;
|
|
for (int32_t i = 0; i < a.length(); i += UTF_CHAR_LENGTH(cp)) {
|
|
cp = a.char32At(i);
|
|
int8_t t = u_charType(cp);
|
|
switch (t) {
|
|
case U_UPPERCASE_LETTER:
|
|
if (haveLower) return TRUE;
|
|
break;
|
|
case U_TITLECASE_LETTER:
|
|
if (haveLower) return TRUE;
|
|
// drop through, since second letter is lower.
|
|
case U_LOWERCASE_LETTER:
|
|
haveLower = TRUE;
|
|
break;
|
|
}
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
void RTTest::test(const UnicodeString& sourceRangeVal,
|
|
const UnicodeString& targetRangeVal,
|
|
const char* newRoundtripExclusions,
|
|
IntlTest* logVal,
|
|
Legal* adoptedLegal) {
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
this->log = logVal;
|
|
this->legalSource = adoptedLegal;
|
|
LegalDeleter cleaner(adoptedLegal, this->legalSource);
|
|
|
|
if (sourceRangeVal.length() > 0) {
|
|
this->sourceRange.applyPattern(sourceRangeVal, status);
|
|
if (U_FAILURE(status)) {
|
|
log->errln("FAIL: UnicodeSet::applyPattern(" +
|
|
sourceRangeVal + ")");
|
|
return;
|
|
}
|
|
} else {
|
|
this->sourceRange.applyPattern("[a-zA-Z]", status);
|
|
if (U_FAILURE(status)) {
|
|
log->errln("FAIL: UnicodeSet::applyPattern([a-z])");
|
|
return;
|
|
}
|
|
}
|
|
this->targetRange.clear();
|
|
if (targetRangeVal.length() > 0) {
|
|
this->targetRange.applyPattern(targetRangeVal, status);
|
|
if (U_FAILURE(status)) {
|
|
log->errln("FAIL: UnicodeSet::applyPattern(" +
|
|
targetRangeVal + ")");
|
|
return;
|
|
}
|
|
}
|
|
this->roundtripExclusions.clear();
|
|
if (newRoundtripExclusions != NULL) {
|
|
UErrorCode ec = U_ZERO_ERROR;
|
|
this->roundtripExclusions.applyPattern(newRoundtripExclusions, ec);
|
|
}
|
|
if (badCharacters.isEmpty()) {
|
|
UErrorCode ec = U_ZERO_ERROR;
|
|
badCharacters.applyPattern("[:Other:]", ec);
|
|
}
|
|
|
|
test2();
|
|
|
|
if (errorCount > 0) {
|
|
log->errln(transliteratorID + " errors: " + errorCount); // + ", see " + logFileName);
|
|
} else {
|
|
log->logln(transliteratorID + " ok");
|
|
}
|
|
}
|
|
|
|
void RTTest::logWrongScript(const UnicodeString& label,
|
|
const UnicodeString& from,
|
|
const UnicodeString& to) {
|
|
log->errln((UnicodeString)"Fail " +
|
|
label + ": " +
|
|
from + "(" + TestUtility::hex(from) + ") => " +
|
|
to + "(" + TestUtility::hex(to) + ")");
|
|
++errorCount;
|
|
}
|
|
|
|
void RTTest::logNotCanonical(const UnicodeString& label,
|
|
const UnicodeString& from,
|
|
const UnicodeString& to,
|
|
const UnicodeString& toCan) {
|
|
log->errln((UnicodeString)"Fail (can.equiv)" +
|
|
label + ": " +
|
|
from + "(" + TestUtility::hex(from) + ") => " +
|
|
to + "(" + TestUtility::hex(to) + ")" +
|
|
toCan + " (" +
|
|
TestUtility::hex(to) + ")"
|
|
);
|
|
++errorCount;
|
|
}
|
|
|
|
void RTTest::logRoundTripFailure(const UnicodeString& from,
|
|
const UnicodeString& to,
|
|
const UnicodeString& back) {
|
|
if (!legalSource->is(from)) return; // skip illegals
|
|
|
|
log->errln((UnicodeString)"Fail Roundtrip: " +
|
|
from + "(" + TestUtility::hex(from) + ") => " +
|
|
to + "(" + TestUtility::hex(to) + ") => " +
|
|
back + "(" + TestUtility::hex(back) + ") => ");
|
|
++errorCount;
|
|
}
|
|
|
|
/*
|
|
* Characters to filter for source-target mapping completeness
|
|
* Typically is base alphabet, minus extended characters
|
|
* Default is ASCII letters for Latin
|
|
*/
|
|
UBool RTTest::isSource(UChar c) {
|
|
return (TestUtility::getScript(c) == sourceScript && u_isalpha(c)
|
|
&& sourceRange.contains(c));
|
|
}
|
|
|
|
/*
|
|
* Characters to check for target back to source mapping.
|
|
* Typically the same as the target script, plus punctuation
|
|
*/
|
|
inline UBool
|
|
RTTest::isReceivingSource(UChar c) {
|
|
int8_t script = TestUtility::getScript(c);
|
|
return (script == sourceScript || script == TestUtility::COMMON_SCRIPT);
|
|
}
|
|
|
|
/*
|
|
* Characters to filter for target-source mapping
|
|
* Typically is base alphabet, minus extended characters
|
|
*/
|
|
inline UBool
|
|
RTTest::isTarget(UChar c) {
|
|
return (TestUtility::getScript(c) == targetScript && u_isalpha(c)
|
|
&& (targetRange.isEmpty() || targetRange.contains(c)));
|
|
}
|
|
|
|
/*
|
|
* Characters to check for target-source mapping
|
|
* Typically the same as the source script, plus punctuation
|
|
*/
|
|
inline UBool
|
|
RTTest::isReceivingTarget(UChar c) {
|
|
int8_t script = TestUtility::getScript(c);
|
|
return (script == targetScript || script == TestUtility::COMMON_SCRIPT);
|
|
}
|
|
|
|
UBool RTTest::isSource(const UnicodeString& s) {
|
|
int32_t length = s.length();
|
|
for (int32_t i = 0; i < length; ++i) {
|
|
if (!isSource(s.charAt(i)))
|
|
return FALSE;
|
|
}
|
|
return TRUE;
|
|
}
|
|
|
|
UBool RTTest::isTarget(const UnicodeString& s) {
|
|
int32_t length = s.length();
|
|
for (int32_t i = 0; i < length; ++i) {
|
|
if (!isTarget(s.charAt(i)))
|
|
return FALSE;
|
|
}
|
|
return TRUE;
|
|
}
|
|
|
|
UBool RTTest::isReceivingSource(const UnicodeString& s) {
|
|
int32_t length = s.length();
|
|
for (int32_t i = 0; i < length; ++i) {
|
|
if (!isReceivingSource(s.charAt(i)))
|
|
return FALSE;
|
|
}
|
|
return TRUE;
|
|
}
|
|
|
|
UBool RTTest::isReceivingTarget(const UnicodeString& s) {
|
|
int32_t length = s.length();
|
|
for (int32_t i = 0; i < length; ++i) {
|
|
if (!isReceivingTarget(s.charAt(i)))
|
|
return FALSE;
|
|
}
|
|
return TRUE;
|
|
}
|
|
|
|
//--------------------------------------------------------------------
|
|
// Specific Tests
|
|
//--------------------------------------------------------------------
|
|
|
|
void TransliteratorRoundTripTest::TestHiragana() {
|
|
RTTest test("Latin-Hiragana",
|
|
TestUtility::LATIN_SCRIPT, TestUtility::HIRAGANA_SCRIPT);
|
|
test.test("[a-z]", UnicodeString("[\\u3040-\\u3094]", ""), NULL, this, new Legal());
|
|
}
|
|
|
|
void TransliteratorRoundTripTest::TestKatakana() {
|
|
RTTest test("Latin-Katakana",
|
|
TestUtility::LATIN_SCRIPT, TestUtility::KATAKANA_SCRIPT);
|
|
test.test("[a-z]", UnicodeString("[\\u30A1-\\u30FA\\u30FC]", ""), NULL, this, new Legal());
|
|
}
|
|
|
|
void TransliteratorRoundTripTest::TestArabic() {
|
|
// RTTest test("Latin-Arabic",
|
|
// TestUtility::LATIN_SCRIPT, TestUtility::ARABIC_SCRIPT);
|
|
// test.test("[a-z]", UnicodeString("[\\u0620-\\u065F-[\\u0640]]", ""), this, new Legal());
|
|
}
|
|
|
|
void TransliteratorRoundTripTest::TestHebrew() {
|
|
// RTTest test("Latin-Hebrew",
|
|
// TestUtility::LATIN_SCRIPT, TestUtility::HEBREW_SCRIPT);
|
|
// test.test("", UnicodeString("[\\u05D0-\\u05EF]", ""), this, new Legal());
|
|
}
|
|
|
|
void TransliteratorRoundTripTest::TestJamo() {
|
|
RTTest t("Latin-Jamo",
|
|
TestUtility::LATIN_SCRIPT, TestUtility::JAMO_SCRIPT);
|
|
t.setErrorLimit(200); // Don't run full test -- too long
|
|
t.test("", "", NULL, this, new Legal());
|
|
}
|
|
|
|
void TransliteratorRoundTripTest::TestJamoHangul() {
|
|
RTTest t("Latin-Hangul",
|
|
TestUtility::LATIN_SCRIPT, TestUtility::HANGUL_SCRIPT);
|
|
t.setErrorLimit(50); // Don't run full test -- too long
|
|
t.test("", "", NULL, this, new Legal());
|
|
}
|
|
|
|
void TransliteratorRoundTripTest::TestGreek() {
|
|
RTTest test("Latin-Greek",
|
|
TestUtility::LATIN_SCRIPT, TestUtility::GREEK_SCRIPT);
|
|
test.test("", UnicodeString("[\\u003B\\u00B7[:Greek:]-[\\u03D7-\\u03EF]]", ""),
|
|
"[\\u037A\\u03D0-\\u03F5]", /* exclusions */
|
|
this, new LegalGreek(TRUE));
|
|
}
|
|
|
|
void TransliteratorRoundTripTest::Testel() {
|
|
RTTest test("Latin-el",
|
|
TestUtility::LATIN_SCRIPT, TestUtility::GREEK_SCRIPT);
|
|
test.test("", "[\\u003B\\u00B7[:Greek:]-[\\u03D7-\\u03EF]]",
|
|
"[\\u037A\\u03D0-\\u03F5]", /* exclusions */
|
|
this, new LegalGreek(FALSE));
|
|
}
|
|
|
|
void TransliteratorRoundTripTest::TestCyrillic() {
|
|
RTTest test("Latin-Cyrillic",
|
|
TestUtility::LATIN_SCRIPT, TestUtility::CYRILLIC_SCRIPT);
|
|
test.test("", UnicodeString("[\\u0400-\\u045F]", ""), NULL, this, new Legal());
|
|
}
|
|
|
|
|
|
//----------------------------------
|
|
// Inter-Indic Tests
|
|
//----------------------------------
|
|
void TransliteratorRoundTripTest::TestDevanagariLatin() {
|
|
RTTest test("Latin-DEVANAGARI",
|
|
TestUtility::LATIN_SCRIPT, TestUtility::DEVANAGARI_SCRIPT);
|
|
test.test("", "[:Devanagari:]", NULL, this, new Legal());
|
|
}
|
|
|
|
void TransliteratorRoundTripTest::TestDevanagariBengali() {
|
|
RTTest test("BENGALI-DEVANAGARI",
|
|
TestUtility::BENGALI_SCRIPT, TestUtility::DEVANAGARI_SCRIPT);
|
|
test.test("[:BENGALI:]", "[:Devanagari:]",
|
|
"[\\u0950\\u0935\\u0912\\u0933\\u090e\\u090D\\u0911\\u093d\\u0929\\u0934[\\u0958-\\u095f]\\u09F0\\u09F1]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("DEVANAGARI-BENGALI",
|
|
TestUtility::DEVANAGARI_SCRIPT, TestUtility::BENGALI_SCRIPT );
|
|
test1.test( "[:Devanagari:]", "[:BENGALI:]",
|
|
"[\\u0950\\u0935\\u0912\\u0933\\u090e\\u090D\\u0911\\u093d\\u0929\\u0934[\\u0958-\\u095f]\\u09F0\\u09F1]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestDevanagariGurmukhi() {
|
|
RTTest test("GURMUKHI-DEVANAGARI",
|
|
TestUtility::GURMUKHI_SCRIPT, TestUtility::DEVANAGARI_SCRIPT);
|
|
test.test("[:GURMUKHI:]", "[:Devanagari:]",
|
|
"[\\u0950\\u090D\\u090e\\u0912\\u0911\\u090b\\u090c\\u0934\\u0960\\u0961\\u0937\\u0a72\\u0a73\\u0a74\\u093d]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("DEVANAGARI-GURMUKHI",
|
|
TestUtility::DEVANAGARI_SCRIPT, TestUtility::GURMUKHI_SCRIPT );
|
|
test1.test( "[:Devanagari:]", "[:GURMUKHI:]",
|
|
"[\\u0950\\u090D\\u090e\\u0912\\u0911\\u090b\\u090c\\u0934\\u0960\\u0961\\u0937\\u0a72\\u0a73\\u0a74\\u093d]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestDevanagariGujarati() {
|
|
RTTest test("GUJARATI-DEVANAGARI",
|
|
TestUtility::GUJARATI_SCRIPT, TestUtility::DEVANAGARI_SCRIPT);
|
|
test.test("[:GUJARATI:]", "[:Devanagari:]",
|
|
"[\\u0961\\u090c\\u090e\\u0912]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("DEVANAGARI-GUJARATI",
|
|
TestUtility::DEVANAGARI_SCRIPT, TestUtility::GUJARATI_SCRIPT );
|
|
test1.test( "[:Devanagari:]", "[:GUJARATI:]",NULL,
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestDevanagariOriya() {
|
|
RTTest test("ORIYA-DEVANAGARI",
|
|
TestUtility::ORIYA_SCRIPT, TestUtility::DEVANAGARI_SCRIPT);
|
|
test.test("[:ORIYA:]", "[:Devanagari:]",
|
|
"[\\u0950\\u090D\\u090e\\u0912\\u0911\\u0931\\u0935]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("DEVANAGARI-ORIYA",
|
|
TestUtility::DEVANAGARI_SCRIPT, TestUtility::ORIYA_SCRIPT );
|
|
test1.test( "[:Devanagari:]", "[:ORIYA:]",
|
|
"[\\u0950\\u090D\\u090e\\u0912\\u0911\\u0931\\u0935]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestDevanagariTamil() {
|
|
RTTest test("Tamil-DEVANAGARI",
|
|
TestUtility::TAMIL_SCRIPT, TestUtility::DEVANAGARI_SCRIPT);
|
|
test.test("[:tamil:]", "[:Devanagari:]",
|
|
"[\\u0950\\u090D\\u0911\\u093d\\u0929\\u0934[\\u0958-\\u095f]"
|
|
"\\u090B\\u090C\\u0916\\u0917\\u0918\\u091B\\u091D\\u0920\\u0921"
|
|
"\\u0922\\u0925\\u0926\\u0927\\u092B\\u092C\\u092D\\u0936\\u0960\\u0961]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("DEVANAGARI-Tamil",
|
|
TestUtility::DEVANAGARI_SCRIPT, TestUtility::TAMIL_SCRIPT );
|
|
test1.test( "[:Devanagari:]", "[:tamil:]",
|
|
"[\\u0950\\u090D\\u0911\\u093d\\u0929\\u0934[\\u0958-\\u095f]]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestDevanagariTelugu() {
|
|
RTTest test("Telugu-DEVANAGARI",
|
|
TestUtility::TELUGU_SCRIPT, TestUtility::DEVANAGARI_SCRIPT);
|
|
test.test("[:telugu:]", "[:Devanagari:]",
|
|
"[\\u0950\\u090D\\u0911\\u093d\\u0929\\u0934[\\u0958-\\u095f]]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("DEVANAGARI-TELUGU",
|
|
TestUtility::DEVANAGARI_SCRIPT, TestUtility::TELUGU_SCRIPT );
|
|
test1.test( "[:Devanagari:]", "[:TELUGU:]",
|
|
"[\\u0950\\u090D\\u0911\\u093d\\u0929\\u0934[\\u0958-\\u095f]]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestDevanagariKannada() {
|
|
RTTest test("KANNADA-DEVANAGARI",
|
|
TestUtility::KANNADA_SCRIPT, TestUtility::DEVANAGARI_SCRIPT);
|
|
test.test("[:KANNADA:]", "[:Devanagari:]",
|
|
"[\\u0950\\u090D\\u0911\\u093d\\u0929\\u0934[\\u0958-\\u095f]]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("DEVANAGARI-KANNADA",
|
|
TestUtility::DEVANAGARI_SCRIPT, TestUtility::KANNADA_SCRIPT );
|
|
test1.test( "[:Devanagari:]", "[:KANNADA:]",
|
|
"[\\u0950\\u090D\\u0911\\u093d\\u0929\\u0934[\\u0958-\\u095f]]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestDevanagariMalayalam() {
|
|
RTTest test("MALAYALAM-DEVANAGARI",
|
|
TestUtility::MALAYALAM_SCRIPT, TestUtility::DEVANAGARI_SCRIPT);
|
|
test.test("[:MALAYALAM:]", "[:Devanagari:]",
|
|
"[\\u0950\\u090D\\u0911\\u093d\\u0929\\u0934[\\u0958-\\u095f]]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("DEVANAGARI-MALAYALAM",
|
|
TestUtility::DEVANAGARI_SCRIPT, TestUtility::MALAYALAM_SCRIPT );
|
|
test1.test( "[:Devanagari:]", "[:MALAYALAM:]",
|
|
"[\\u0950\\u090D\\u0911\\u093d\\u0929\\u0934[\\u0958-\\u095f]]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestBengaliGurmukhi() {
|
|
RTTest test("GURMUKHI-BENGALI",
|
|
TestUtility::GURMUKHI_SCRIPT, TestUtility::BENGALI_SCRIPT);
|
|
test.test("[:GURMUKHI:]", "[:BENGALI:]",
|
|
"[\\u098B\\u098C\\u09B7\\u09E0\\u09E1\\u09F0\\u09F1]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("BENGALI-GURMUKHI",
|
|
TestUtility::BENGALI_SCRIPT, TestUtility::GURMUKHI_SCRIPT);
|
|
test1.test( "[:BENGALI:]", "[:GURMUKHI:]",
|
|
"[\\u0A33\\u0A35\\u0A59\\u0A5A\\u0A5B\\u0A5E\\u0A72\\u0A73\\u0A74]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestBengaliGujarati() {
|
|
RTTest test("GUJARATI-BENGALI",
|
|
TestUtility::GUJARATI_SCRIPT, TestUtility::BENGALI_SCRIPT);
|
|
test.test("[:GUJARATI:]", "[:BENGALI:]",
|
|
"[\\u098c\\u09e1\\u09f0\\u09f1]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("BENGALI-GUJARATI",
|
|
TestUtility::BENGALI_SCRIPT, TestUtility::GUJARATI_SCRIPT);
|
|
test1.test( "[:BENGALI:]", "[:GUJARATI:]",
|
|
"[\\u0A8D\\u0A91\\u0AB3\\u0AB5\\u0ABD\\u0AD0]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestBengaliOriya() {
|
|
RTTest test("ORIYA-BENGALI",
|
|
TestUtility::ORIYA_SCRIPT, TestUtility::BENGALI_SCRIPT);
|
|
test.test("[:ORIYA:]", "[:BENGALI:]",
|
|
"[\\u09f0\\u09f1]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("BENGALI-ORIYA",
|
|
TestUtility::BENGALI_SCRIPT, TestUtility::ORIYA_SCRIPT);
|
|
test1.test( "[:BENGALI:]", "[:ORIYA:]",
|
|
"[\\u0b33\\u0b3d]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestBengaliTamil() {
|
|
RTTest test("Tamil-BENGALI",
|
|
TestUtility::TAMIL_SCRIPT, TestUtility::BENGALI_SCRIPT);
|
|
test.test("[:tamil:]", "[:BENGALI:]",
|
|
"[\\u09f0\\u09f1\\u098B\\u098C\\u0996\\u0997\\u0998\\u099B\\u099D\\u09A0\\u09A1\\u09A2\\u09A5\\u09A6\\u09A7\\u09AB\\u09AC\\u09AD\\u09B6\\u09DC\\u09DD\\u09DF\\u09E0\\u09E1]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("BENGALI-Tamil",
|
|
TestUtility::BENGALI_SCRIPT, TestUtility::TAMIL_SCRIPT);
|
|
test1.test( "[:BENGALI:]", "[:tamil:]",
|
|
"[\\u0B8E\\u0B92\\u0BA9\\u0BB1\\u0BB3\\u0BB4\\u0BB5]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestBengaliTelugu() {
|
|
RTTest test("Telugu-BENGALI",
|
|
TestUtility::TELUGU_SCRIPT, TestUtility::BENGALI_SCRIPT);
|
|
test.test("[:telugu:]", "[:BENGALI:]",
|
|
"[\\u09f0\\u09f1\\u09dc\\u09dd\\u09df]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("BENGALI-TELUGU",
|
|
TestUtility::BENGALI_SCRIPT, TestUtility::TELUGU_SCRIPT);
|
|
test1.test( "[:BENGALI:]", "[:TELUGU:]",
|
|
"[\\u0C0E\\u0C12\\u0C31\\u0C33\\u0C35]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestBengaliKannada() {
|
|
RTTest test("KANNADA-BENGALI",
|
|
TestUtility::KANNADA_SCRIPT, TestUtility::BENGALI_SCRIPT);
|
|
test.test("[:KANNADA:]", "[:BENGALI:]",
|
|
"[\\u09f0\\u09f1\\u09dc\\u09dd\\u09df]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("BENGALI-KANNADA",
|
|
TestUtility::BENGALI_SCRIPT, TestUtility::KANNADA_SCRIPT);
|
|
test1.test( "[:BENGALI:]", "[:KANNADA:]",
|
|
"[\\u0C8E\\u0C92\\u0CB1\\u0cb3\\u0cb5\\u0cde]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestBengaliMalayalam() {
|
|
RTTest test("MALAYALAM-BENGALI",
|
|
TestUtility::MALAYALAM_SCRIPT, TestUtility::BENGALI_SCRIPT);
|
|
test.test("[:MALAYALAM:]", "[:BENGALI:]",
|
|
"[\\u09f0\\u09f1\\u09dc\\u09dd\\u09df]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("BENGALI-MALAYALAM",
|
|
TestUtility::BENGALI_SCRIPT, TestUtility::MALAYALAM_SCRIPT);
|
|
test1.test( "[:BENGALI:]", "[:MALAYALAM:]",
|
|
"[\\u0d31-\\u0d35\\u0d0e\\u0d12]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestGurmukhiGujarati() {
|
|
RTTest test("GUJARATI-GURMUKHI",
|
|
TestUtility::GUJARATI_SCRIPT, TestUtility::GURMUKHI_SCRIPT);
|
|
test.test("[:GUJARATI:]", "[:GURMUKHI:]",
|
|
"[\\u0a5c\\u0a72\\u0a73\\u0a74\\u0a8b\\u0a8d\\u0a91\\u0abd]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("GURMUKHI-GUJARATI",
|
|
TestUtility::GURMUKHI_SCRIPT, TestUtility::GUJARATI_SCRIPT);;
|
|
test1.test( "[:GURMUKHI:]", "[:GUJARATI:]",
|
|
"[\\u0a5c\\u0a72\\u0a73\\u0a74\\u0a8b\\u0a8d\\u0a91\\u0ab7\\u0abd\\u0ad0\\u0ae0]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestGurmukhiOriya() {
|
|
RTTest test("ORIYA-GURMUKHI",
|
|
TestUtility::ORIYA_SCRIPT, TestUtility::GURMUKHI_SCRIPT);
|
|
test.test("[:ORIYA:]", "[:GURMUKHI:]",
|
|
"[\\u0B0B\\u0B0C\\u0B37\\u0B3D\\u0B5F\\u0B60\\u0B61\\u0a35\\u0a72\\u0a73\\u0a74]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("GURMUKHI-ORIYA",
|
|
TestUtility::GURMUKHI_SCRIPT, TestUtility::ORIYA_SCRIPT);;
|
|
test1.test( "[:GURMUKHI:]", "[:ORIYA:]",
|
|
"[\\u0B0B\\u0B0C\\u0B37\\u0B3D\\u0B5F\\u0B60\\u0B61]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestGurmukhiTamil() {
|
|
RTTest test("TAMIL-GURMUKHI",
|
|
TestUtility::TAMIL_SCRIPT, TestUtility::GURMUKHI_SCRIPT);
|
|
test.test("[:TAMIL:]", "[:GURMUKHI:]",
|
|
"[\\u0A16\\u0A17\\u0A18\\u0A1B\\u0A1D\\u0A20\\u0A21\\u0A22\\u0A25\\u0A26\\u0A27\\u0A2B\\u0A2C\\u0A2D\\u0A59\\u0A5A\\u0A5B\\u0A5C\\u0A5E\\u0A72\\u0A73\\u0A74]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("GURMUKHI-TAMIL",
|
|
TestUtility::GURMUKHI_SCRIPT, TestUtility::TAMIL_SCRIPT);;
|
|
test1.test( "[:GURMUKHI:]", "[:TAMIL:]",
|
|
"[\\u0B8E\\u0B92\\u0BA9\\u0BB1\\u0BB4]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestGurmukhiTelugu() {
|
|
RTTest test("TELUGU-GURMUKHI",
|
|
TestUtility::TELUGU_SCRIPT, TestUtility::GURMUKHI_SCRIPT);
|
|
test.test("[:TELUGU:]", "[:GURMUKHI:]",
|
|
"[\\u0A59\\u0A5A\\u0A5B\\u0A5C\\u0A5E\\u0A72\\u0A73\\u0A74]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("GURMUKHI-TELUGU",
|
|
TestUtility::GURMUKHI_SCRIPT, TestUtility::TELUGU_SCRIPT);;
|
|
test1.test( "[:GURMUKHI:]", "[:TELUGU:]",
|
|
"[\\u0C0B\\u0C0C\\u0C0E\\u0C12\\u0C31\\u0C37\\u0C60\\u0C61]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestGurmukhiKannada() {
|
|
RTTest test("KANNADA-GURMUKHI",
|
|
TestUtility::KANNADA_SCRIPT, TestUtility::GURMUKHI_SCRIPT);
|
|
test.test("[:KANNADA:]", "[:GURMUKHI:]",
|
|
"[\\u0A59\\u0A5A\\u0A5B\\u0A5C\\u0A5E\\u0A72\\u0A73\\u0A74]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("GURMUKHI-KANNADA",
|
|
TestUtility::GURMUKHI_SCRIPT, TestUtility::KANNADA_SCRIPT);;
|
|
test1.test( "[:GURMUKHI:]", "[:KANNADA:]",
|
|
"[\\u0C8B\\u0C8C\\u0C8E\\u0C92\\u0CB1\\u0CB7\\u0CE0\\u0CE1]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestGurmukhiMalayalam() {
|
|
RTTest test("MALAYALAM-GURMUKHI",
|
|
TestUtility::MALAYALAM_SCRIPT, TestUtility::GURMUKHI_SCRIPT);
|
|
test.test("[:MALAYALAM:]", "[:GURMUKHI:]",
|
|
"[\\u0A59\\u0A5A\\u0A5B\\u0A5C\\u0A5E\\u0A72\\u0A73\\u0A74]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("GURMUKHI-MALAYALAM",
|
|
TestUtility::GURMUKHI_SCRIPT, TestUtility::MALAYALAM_SCRIPT);;
|
|
test1.test( "[:GURMUKHI:]", "[:MALAYALAM:]",
|
|
"[\\u0D0B\\u0D0C\\u0D0E\\u0D12\\u0D31\\u0D34\\u0D37\\u0D60\\u0D61]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
|
|
void TransliteratorRoundTripTest::TestGujaratiOriya() {
|
|
RTTest test("GUJARATI-ORIYA",
|
|
TestUtility::GUJARATI_SCRIPT, TestUtility::ORIYA_SCRIPT);
|
|
test.test("[:GUJARATI:]", "[:ORIYA:]",
|
|
"[\\u0B0C\\u0B5F\\u0B61]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("ORIYA-GUJARATI",
|
|
TestUtility::ORIYA_SCRIPT, TestUtility::GUJARATI_SCRIPT);;
|
|
test1.test( "[:ORIYA:]", "[:GUJARATI:]",
|
|
"[\\u0A8D\\u0A91\\u0AB5\\u0Ad0]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestGujaratiTamil() {
|
|
RTTest test("TAMIL-GUJARATI",
|
|
TestUtility::TAMIL_SCRIPT, TestUtility::GUJARATI_SCRIPT);
|
|
test.test("[:TAMIL:]", "[:GUJARATI:]",
|
|
"[\\u0A8B\\u0A8D\\u0A91\\u0A96\\u0A97\\u0A98\\u0A9B\\u0A9D\\u0AA0\\u0AA1\\u0AA2\\u0AA5\\u0AA6\\u0AA7\\u0AAB\\u0AAC\\u0AAD\\u0AB6\\u0ABD\\u0AD0\\u0AE0]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("GUJARATI-TAMIL",
|
|
TestUtility::GUJARATI_SCRIPT, TestUtility::TAMIL_SCRIPT);;
|
|
test1.test( "[:GUJARATI:]", "[:TAMIL:]",
|
|
"[\\u0B8E\\u0B92\\u0BA9\\u0BB1\\u0BB4]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestGujaratiTelugu() {
|
|
RTTest test("TELUGU-GUJARATI",
|
|
TestUtility::TELUGU_SCRIPT, TestUtility::GUJARATI_SCRIPT);
|
|
test.test("[:TELUGU:]", "[:GUJARATI:]",
|
|
"[\\u0A8D\\u0A91\\u0ABD\\u0Ad0]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("GUJARATI-TELUGU",
|
|
TestUtility::GUJARATI_SCRIPT, TestUtility::TELUGU_SCRIPT);;
|
|
test1.test( "[:GUJARATI:]", "[:TELUGU:]",
|
|
"[\\u0C0C\\u0C0E\\u0C12\\u0C31\\u0C61]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestGujaratiKannada() {
|
|
RTTest test("KANNADA-GUJARATI",
|
|
TestUtility::KANNADA_SCRIPT, TestUtility::GUJARATI_SCRIPT);
|
|
test.test("[:KANNADA:]", "[:GUJARATI:]",
|
|
"[\\u0A8D\\u0A91\\u0ABD\\u0Ad0]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("GUJARATI-KANNADA",
|
|
TestUtility::GUJARATI_SCRIPT, TestUtility::KANNADA_SCRIPT);;
|
|
test1.test( "[:GUJARATI:]", "[:KANNADA:]",
|
|
"[\\u0C8C\\u0C8E\\u0C92\\u0CB1\\u0CDE\\u0CE1]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestGujaratiMalayalam() {
|
|
RTTest test("MALAYALAM-GUJARATI",
|
|
TestUtility::MALAYALAM_SCRIPT, TestUtility::GUJARATI_SCRIPT);
|
|
test.test("[:MALAYALAM:]", "[:GUJARATI:]",
|
|
"[\\u0A8D\\u0A91\\u0ABD\\u0Ad0]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("GUJARATI-MALAYALAM",
|
|
TestUtility::GUJARATI_SCRIPT, TestUtility::MALAYALAM_SCRIPT);;
|
|
test1.test( "[:GUJARATI:]", "[:MALAYALAM:]",
|
|
"[\\u0D0C\\u0D0E\\u0D12\\u0D31\\u0D34\\u0D61]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestOriyaTamil() {
|
|
RTTest test("TAMIL-ORIYA",
|
|
TestUtility::TAMIL_SCRIPT, TestUtility::ORIYA_SCRIPT);
|
|
test.test("[:TAMIL:]", "[:ORIYA:]",
|
|
"[\\u0B0B\\u0B0C\\u0B16\\u0B17\\u0B18\\u0B1B\\u0B1D\\u0B20\\u0B21\\u0B22\\u0B25\\u0B26\\u0B27\\u0B2B\\u0B2C\\u0B2D\\u0B36\\u0B3D\\u0B5C\\u0B5D\\u0B5F\\u0B60\\u0B61]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("ORIYA-TAMIL",
|
|
TestUtility::ORIYA_SCRIPT, TestUtility::TAMIL_SCRIPT);;
|
|
test1.test( "[:ORIYA:]", "[:TAMIL:]",
|
|
"[\\u0B8E\\u0B92\\u0BA9\\u0BB1\\u0BB4\\u0BB5]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestOriyaTelugu() {
|
|
RTTest test("TELUGU-ORIYA",
|
|
TestUtility::TELUGU_SCRIPT, TestUtility::ORIYA_SCRIPT);
|
|
test.test("[:TELUGU:]", "[:ORIYA:]",
|
|
"[\\u0B3D\\u0B5C\\u0B5D\\u0B5F]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("ORIYA-TELUGU",
|
|
TestUtility::ORIYA_SCRIPT, TestUtility::TELUGU_SCRIPT);;
|
|
test1.test( "[:ORIYA:]", "[:TELUGU:]",
|
|
"[\\u0C0E\\u0C12\\u0C31\\u0C35]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestOriyaKannada() {
|
|
RTTest test("KANNADA-ORIYA",
|
|
TestUtility::KANNADA_SCRIPT, TestUtility::ORIYA_SCRIPT);
|
|
test.test("[:KANNADA:]", "[:ORIYA:]",
|
|
"[\\u0B3D\\u0B5C\\u0B5D\\u0B5F]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("ORIYA-KANNADA",
|
|
TestUtility::ORIYA_SCRIPT, TestUtility::KANNADA_SCRIPT);;
|
|
test1.test( "[:ORIYA:]", "[:KANNADA:]",
|
|
"[\\u0C8E\\u0C92\\u0CB1\\u0CB5\\u0CDE]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestOriyaMalayalam() {
|
|
RTTest test("MALAYALAM-ORIYA",
|
|
TestUtility::MALAYALAM_SCRIPT, TestUtility::ORIYA_SCRIPT);
|
|
test.test("[:MALAYALAM:]", "[:ORIYA:]",
|
|
"[\\u0B3D\\u0B5C\\u0B5D\\u0B5F]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("ORIYA-MALAYALAM",
|
|
TestUtility::ORIYA_SCRIPT, TestUtility::MALAYALAM_SCRIPT);;
|
|
test1.test( "[:ORIYA:]", "[:MALAYALAM:]",
|
|
"[\\u0D0E\\u0D12\\u0D31\\u0D34\\u0D35]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
|
|
void TransliteratorRoundTripTest::TestTamilTelugu() {
|
|
RTTest test("TELUGU-TAMIL",
|
|
TestUtility::TELUGU_SCRIPT, TestUtility::TAMIL_SCRIPT);
|
|
test.test("[:TELUGU:]", "[:TAMIL:]",
|
|
"[\\u0ba9\\u0bb4]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("TAMIL-TELUGU",
|
|
TestUtility::TAMIL_SCRIPT, TestUtility::TELUGU_SCRIPT);;
|
|
test1.test( "[:TAMIL:]", "[:TELUGU:]",
|
|
"[\\u0C0B\\u0C0C\\u0C16\\u0C17\\u0C18\\u0C1B\\u0C1D\\u0C20\\u0C21\\u0C22\\u0C25\\u0C26\\u0C27\\u0C2B\\u0C2C\\u0C2D\\u0C36\\u0C60\\u0C61]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestTamilKannada() {
|
|
RTTest test("KANNADA-TAMIL",
|
|
TestUtility::KANNADA_SCRIPT, TestUtility::TAMIL_SCRIPT);
|
|
test.test("[:KANNADA:]", "[:TAMIL:]",
|
|
"[\\u0ba9\\u0bb4]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("TAMIL-KANNADA",
|
|
TestUtility::TAMIL_SCRIPT, TestUtility::KANNADA_SCRIPT);;
|
|
test1.test( "[:TAMIL:]", "[:KANNADA:]",
|
|
"[\\u0C8B\\u0C8C\\u0C96\\u0C97\\u0C98\\u0C9B\\u0C9D\\u0CA0\\u0CA1\\u0CA2\\u0CA5\\u0CA6\\u0CA7\\u0CAB\\u0CAC\\u0CAD\\u0CB6\\u0CDE\\u0CE0\\u0CE1]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestTamilMalayalam() {
|
|
RTTest test("MALAYALAM-TAMIL",
|
|
TestUtility::MALAYALAM_SCRIPT, TestUtility::TAMIL_SCRIPT);
|
|
test.test("[:MALAYALAM:]", "[:TAMIL:]",
|
|
"[\\u0ba9]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("TAMIL-MALAYALAM",
|
|
TestUtility::TAMIL_SCRIPT, TestUtility::MALAYALAM_SCRIPT);;
|
|
test1.test( "[:TAMIL:]", "[:MALAYALAM:]",
|
|
"[\\u0D0B\\u0D0C\\u0D16\\u0D17\\u0D18\\u0D1B\\u0D1D\\u0D20\\u0D21\\u0D22\\u0D25\\u0D26\\u0D27\\u0D2B\\u0D2C\\u0D2D\\u0D36\\u0D60\\u0D61]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestTeluguKannada() {
|
|
RTTest test("KANNADA-TELUGU",
|
|
TestUtility::KANNADA_SCRIPT, TestUtility::TELUGU_SCRIPT);
|
|
test.test("[:KANNADA:]", "[:TELUGU:]",
|
|
"[]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("TELUGU-KANNADA",
|
|
TestUtility::TELUGU_SCRIPT, TestUtility::KANNADA_SCRIPT);;
|
|
test1.test( "[:TELUGU:]", "[:KANNADA:]",
|
|
"[\\u0CDE]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
void TransliteratorRoundTripTest::TestTeluguMalayalam() {
|
|
RTTest test("MALAYALAM-TELUGU",
|
|
TestUtility::MALAYALAM_SCRIPT, TestUtility::TELUGU_SCRIPT);
|
|
test.test("[:MALAYALAM:]", "[:TELUGU:]",
|
|
"[]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("TELUGU-MALAYALAM",
|
|
TestUtility::TELUGU_SCRIPT, TestUtility::MALAYALAM_SCRIPT);;
|
|
test1.test( "[:TELUGU:]", "[:MALAYALAM:]",
|
|
"[\\u0D34]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
|
|
void TransliteratorRoundTripTest::TestKannadaMalayalam() {
|
|
RTTest test("MALAYALAM-KANNADA",
|
|
TestUtility::MALAYALAM_SCRIPT, TestUtility::KANNADA_SCRIPT);
|
|
test.test("[:MALAYALAM:]", "[:KANNADA:]",
|
|
"[\\u0cDe]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
RTTest test1("KANNADA-MALAYALAM",
|
|
TestUtility::KANNADA_SCRIPT, TestUtility::MALAYALAM_SCRIPT);
|
|
test1.test( "[:KANNADA:]", "[:MALAYALAM:]",
|
|
"[\\u0D34]", /*roundtrip exclusions*/
|
|
this, new Legal());
|
|
}
|
|
|
|
//---------------
|
|
// End Indic
|
|
//---------------
|
|
|
|
void RTTest::test2() {
|
|
|
|
UChar c;
|
|
UnicodeString cs, targ, reverse;
|
|
int8_t *type = new int8_t[0xFFFF];
|
|
UParseError parseError;
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
Transliterator* sourceToTarget = Transliterator::createInstance(transliteratorID, UTRANS_FORWARD, parseError, status);
|
|
if (sourceToTarget == NULL) {
|
|
log->errln("Fail: createInstance(" + transliteratorID +
|
|
") returned NULL");
|
|
return;
|
|
}
|
|
Transliterator* targetToSource = sourceToTarget->createInverse(status);
|
|
if (targetToSource == NULL) {
|
|
log->errln("Fail: " + transliteratorID +
|
|
".createInverse() returned NULL");
|
|
delete sourceToTarget;
|
|
return;
|
|
}
|
|
|
|
log->logln("Initializing type array");
|
|
|
|
for (c = 0; c < 0xFFFF; ++c) {
|
|
type[c] = u_charType(c);
|
|
}
|
|
|
|
BitSet failSourceTarg;
|
|
|
|
log->logln("Checking that all source characters convert to target - Singles");
|
|
|
|
for (c = 0; c < 0xFFFF; ++c) {
|
|
if (type[c] == U_UNASSIGNED || !isSource(c))
|
|
continue;
|
|
cs.remove();
|
|
cs.append(c);
|
|
targ = cs;
|
|
sourceToTarget->transliterate(targ);
|
|
if (!isReceivingTarget(targ) || includesSome(badCharacters, targ)) {
|
|
logWrongScript("Source-Target", cs, targ);
|
|
failSourceTarg.set(c);
|
|
if (errorCount >= errorLimit)
|
|
return;
|
|
} else {
|
|
UnicodeString cs2;
|
|
UErrorCode ec = U_ZERO_ERROR;
|
|
Normalizer::decompose(cs, FALSE, 0, cs2, ec);
|
|
UnicodeString targ2 = cs2;
|
|
sourceToTarget->transliterate(targ2);
|
|
if (targ != targ2) {
|
|
logNotCanonical("Source-Target", cs, targ, targ2);
|
|
if (errorCount >= errorLimit)
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
log->logln("Checking that all source characters convert to target - Doubles");
|
|
|
|
for (c = 0; c < 0xFFFF; ++c) {
|
|
if (type[c] == U_UNASSIGNED ||
|
|
!isSource(c)) continue;
|
|
if (failSourceTarg.get(c)) continue;
|
|
|
|
for (UChar d = 0; d < 0xFFFF; ++d) {
|
|
if (type[d] == U_UNASSIGNED || !isSource(d))
|
|
continue;
|
|
if (failSourceTarg.get(d)) continue;
|
|
|
|
cs.remove();
|
|
cs.append(c).append(d);
|
|
targ = cs;
|
|
sourceToTarget->transliterate(targ);
|
|
if (!isReceivingTarget(targ) || includesSome(badCharacters, targ)) {
|
|
logWrongScript("Source-Target", cs, targ);
|
|
if (errorCount >= errorLimit)
|
|
return;
|
|
} else {
|
|
UnicodeString cs2;
|
|
UErrorCode ec = U_ZERO_ERROR;
|
|
Normalizer::decompose(cs, FALSE, 0, cs2, ec);
|
|
UnicodeString targ2 = cs2;
|
|
sourceToTarget->transliterate(targ2);
|
|
if (targ != targ2) {
|
|
logNotCanonical("Source-Target", cs, targ, targ2);
|
|
if (errorCount >= errorLimit)
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
log->logln("Checking that target characters convert to source and back - Singles");
|
|
|
|
BitSet failTargSource;
|
|
BitSet failRound;
|
|
|
|
for (c = 0; c < 0xFFFF; ++c) {
|
|
if (type[c] == U_UNASSIGNED || !isTarget(c))
|
|
continue;
|
|
cs.remove();
|
|
cs.append(c);
|
|
targ = cs;
|
|
targetToSource->transliterate(targ);
|
|
reverse = targ;
|
|
sourceToTarget->transliterate(reverse);
|
|
if (!isReceivingSource(targ) || includesSome(badCharacters, targ)) {
|
|
logWrongScript("Target-Source", cs, targ);
|
|
failTargSource.set(c);
|
|
if (errorCount >= errorLimit)
|
|
return;
|
|
} else if (!isSame(cs, reverse) && !roundtripExclusions.contains(c)) {
|
|
logRoundTripFailure(cs, targ, reverse);
|
|
failRound.set(c);
|
|
if (errorCount >= errorLimit)
|
|
return;
|
|
} else {
|
|
UnicodeString targ2;
|
|
UErrorCode ec = U_ZERO_ERROR;
|
|
Normalizer::decompose(targ, FALSE, 0, targ2, ec);
|
|
UnicodeString reverse2 = targ2;
|
|
sourceToTarget->transliterate(reverse2);
|
|
if (reverse != reverse2) {
|
|
logNotCanonical("Target-Source", cs, targ, targ2);
|
|
if (errorCount >= errorLimit)
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
log->logln("Checking that target characters convert to source and back - Doubles");
|
|
int32_t count = 0;
|
|
cs = UNICODE_STRING("aa", 2);
|
|
for (c = 0; c < 0xFFFF; ++c) {
|
|
if (type[c] == U_UNASSIGNED || !isTarget(c))
|
|
continue;
|
|
if (++count > pairLimit) {
|
|
//throw new TestTruncated("Test truncated at " + pairLimit + " x 64k pairs");
|
|
log->logln("");
|
|
log->logln((UnicodeString)"Test truncated at " + pairLimit + " x 64k pairs");
|
|
return;
|
|
}
|
|
cs.setCharAt(0, c);
|
|
log->log(TestUtility::hex(c));
|
|
for (UChar d = 0; d < 0xFFFF; ++d) {
|
|
if (type[d] == U_UNASSIGNED || !isTarget(d))
|
|
continue;
|
|
cs.setCharAt(1, d);
|
|
targ = cs;
|
|
targetToSource->transliterate(targ);
|
|
reverse = targ;
|
|
sourceToTarget->transliterate(reverse);
|
|
if (!isReceivingSource(targ) && !failTargSource.get(c) && !failTargSource.get(d)
|
|
|| includesSome(badCharacters, targ)) {
|
|
logWrongScript("Target-Source", cs, targ);
|
|
if (errorCount >= errorLimit)
|
|
return;
|
|
} else if (!isSame(cs, reverse) && !failRound.get(c) && !failRound.get(d)
|
|
&& !roundtripExclusions.contains(c) && !roundtripExclusions.contains(d)) {
|
|
logRoundTripFailure(cs, targ, reverse);
|
|
if (errorCount >= errorLimit)
|
|
return;
|
|
} else {
|
|
UnicodeString targ2;
|
|
UErrorCode ec = U_ZERO_ERROR;
|
|
Normalizer::decompose(targ, FALSE, 0, targ2, ec);
|
|
UnicodeString reverse2 = targ2;
|
|
sourceToTarget->transliterate(reverse2);
|
|
if (reverse != reverse2) {
|
|
logNotCanonical("Target-Source", cs, targ, targ2);
|
|
if (errorCount >= errorLimit)
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
log->logln("");
|
|
delete []type;
|
|
delete sourceToTarget;
|
|
delete targetToSource;
|
|
}
|
|
|