ICU-89 add test for Tibetan & 3.0 composition exclusions

X-SVN-Rev: 1868
This commit is contained in:
Alan Liu 2000-07-13 22:13:13 +00:00
parent d2afba2116
commit 0410607628
2 changed files with 92 additions and 29 deletions

View File

@ -11,8 +11,33 @@
#define ARRAY_LENGTH(array) (sizeof (array) / sizeof (*array))
#define CASE(id,test) case id: \
name = #test; \
if (exec) { \
logln(#test "---"); \
logln((UnicodeString)""); \
test(); \
} \
break
static UErrorCode status = U_ZERO_ERROR;
void BasicNormalizerTest::runIndexedTest(int32_t index, UBool exec,
char* &name, char* par) {
switch (index) {
CASE(0,TestDecomp);
CASE(1,TestCompatDecomp);
CASE(2,TestCanonCompose);
CASE(3,TestCompatCompose);
CASE(4,TestPrevious);
CASE(5,TestHangulDecomp);
CASE(6,TestHangulCompose);
CASE(7,TestTibetan);
CASE(8,TestCompositionExclusion);
default: name = ""; break;
}
}
/**
* Convert Java-style strings with \u Unicode escapes into UnicodeString objects
*/
@ -136,7 +161,7 @@ BasicNormalizerTest::BasicNormalizerTest()
/* Hangul Compatible */
// Input Decomposed Composed
// THIS IS NO LONGER TRUE IN UNICODE v2.1.8, SO THIS TEST IS OBSOLETE
// THIS IS NO LONGER TRUE IN UNICODE v2.1.9, SO THIS TEST IS OBSOLETE
//-obsolete- hangulCompat[0][0] = str("\\ud4db"); hangulCompat[0][1] = str("\\u1111\\u116e\\u1175\\u11af\\u11c2"); hangulCompat[0][2] = str("\\ud478\\u1175\\u11af\\u11c2");
}
@ -207,22 +232,18 @@ void BasicNormalizerTest::TestCompatCompose()
UnicodeString BasicNormalizerTest::hangulCanon[2][3];
//-obsolete-UnicodeString BasicNormalizerTest::hangulCompat[1][3];
void BasicNormalizerTest::TestHangulCompose()
{
// Make sure that the static composition methods work
logln("Canonical composition...");
staticTest(Normalizer::COMPOSE, 0, hangulCanon, ARRAY_LENGTH(hangulCanon), 2);
logln("Compatibility composition...");
//-obsolete- staticTest(Normalizer::COMPOSE_COMPAT, 0, hangulCompat, ARRAY_LENGTH(hangulCompat), 2);
// Now try iterative composition....
logln("Static composition...");
Normalizer* norm = new Normalizer("", Normalizer::COMPOSE, 0);
iterateTest(norm, hangulCanon, ARRAY_LENGTH(hangulCanon), 2);
norm->setMode(Normalizer::COMPOSE_COMPAT);
//-obsolete- iterateTest(norm, hangulCompat, ARRAY_LENGTH(hangulCompat), 2);
// And finally, make sure you can do it in reverse too
logln("Reverse iteration...");
@ -239,14 +260,12 @@ void BasicNormalizerTest::TestHangulDecomp()
logln("Canonical decomposition...");
staticTest(Normalizer::DECOMP, 0, hangulCanon, ARRAY_LENGTH(hangulCanon), 1);
logln("Compatibility decomposition...");
//-obsolete- staticTest(Normalizer::DECOMP_COMPAT, 0, hangulCompat, ARRAY_LENGTH(hangulCompat), 1);
// Now the iterative decomposition methods...
logln("Iterative decomposition...");
Normalizer* norm = new Normalizer("", Normalizer::DECOMP, 0);
iterateTest(norm, hangulCanon, ARRAY_LENGTH(hangulCanon), 1);
norm->setMode(Normalizer::DECOMP_COMPAT);
//-obsolete- iterateTest(norm, hangulCompat, ARRAY_LENGTH(hangulCompat), 1);
// And finally, make sure you can do it in reverse too
logln("Reverse iteration...");
@ -257,6 +276,70 @@ void BasicNormalizerTest::TestHangulDecomp()
delete norm;
}
/**
* The Tibetan vowel sign AA, 0f71, was messed up prior to Unicode version 2.1.9.
* Once 2.1.9 or 3.0 is released, uncomment this test.
*/
void BasicNormalizerTest::TestTibetan(void) {
UnicodeString decomp[1][3];
decomp[0][0] = str("\\u0f77");
decomp[0][1] = str("\\u0f77");
decomp[0][2] = str("\\u0fb2\\u0f71\\u0f80");
UnicodeString compose[1][3];
compose[0][0] = str("\\u0fb2\\u0f71\\u0f80");
compose[0][1] = str("\\u0fb2\\u0f71\\u0f80");
compose[0][2] = str("\\u0fb2\\u0f71\\u0f80");
staticTest(Normalizer::DECOMP, 0, decomp, ARRAY_LENGTH(decomp), 1);
staticTest(Normalizer::DECOMP_COMPAT, 0, decomp, ARRAY_LENGTH(decomp), 2);
staticTest(Normalizer::COMPOSE, 0, compose, ARRAY_LENGTH(compose), 1);
staticTest(Normalizer::COMPOSE_COMPAT, 0, compose, ARRAY_LENGTH(compose), 2);
}
/**
* Make sure characters in the CompositionExclusion.txt list do not get
* composed to.
*/
void BasicNormalizerTest::TestCompositionExclusion(void) {
// This list is generated from CompositionExclusion.txt.
// Update whenever the normalizer tables are updated. Note
// that we test all characters listed, even those that can be
// derived from the Unicode DB and are therefore commented
// out.
UnicodeString EXCLUDED = str(
"\\u0340\\u0341\\u0343\\u0344\\u0374\\u037E\\u0387\\u0958"
"\\u0959\\u095A\\u095B\\u095C\\u095D\\u095E\\u095F\\u09DC"
"\\u09DD\\u09DF\\u0A33\\u0A36\\u0A59\\u0A5A\\u0A5B\\u0A5E"
"\\u0B5C\\u0B5D\\u0F43\\u0F4D\\u0F52\\u0F57\\u0F5C\\u0F69"
"\\u0F73\\u0F75\\u0F76\\u0F78\\u0F81\\u0F93\\u0F9D\\u0FA2"
"\\u0FA7\\u0FAC\\u0FB9\\u1F71\\u1F73\\u1F75\\u1F77\\u1F79"
"\\u1F7B\\u1F7D\\u1FBB\\u1FBE\\u1FC9\\u1FCB\\u1FD3\\u1FDB"
"\\u1FE3\\u1FEB\\u1FEE\\u1FEF\\u1FF9\\u1FFB\\u1FFD\\u2000"
"\\u2001\\u2126\\u212A\\u212B\\u2329\\u232A\\uF900\\uFA10"
"\\uFA12\\uFA15\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A\\uFB1F"
"\\uFB2A\\uFB2B\\uFB2C\\uFB2D\\uFB2E\\uFB2F\\uFB30\\uFB31"
"\\uFB32\\uFB33\\uFB34\\uFB35\\uFB36\\uFB38\\uFB39\\uFB3A"
"\\uFB3B\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46"
"\\uFB47\\uFB48\\uFB49\\uFB4A\\uFB4B\\uFB4C\\uFB4D\\uFB4E"
);
for (int32_t i=0; i<EXCLUDED.length(); ++i) {
UnicodeString a(EXCLUDED.charAt(i));
UnicodeString b;
UnicodeString c;
Normalizer::normalize(a, Normalizer::DECOMP_COMPAT, 0, b, status);
Normalizer::normalize(b, Normalizer::COMPOSE, 0, c, status);
if (c == a) {
errln("FAIL: " + hex(a) + " x DECOMP_COMPAT => " +
hex(b) + " x COMPOSE => " +
hex(c));
} else if (verbose) {
logln("Ok: " + hex(a) + " x DECOMP_COMPAT => " +
hex(b) + " x COMPOSE => " +
hex(c));
}
}
}
//------------------------------------------------------------------------
// Internal utilities
@ -360,23 +443,3 @@ void BasicNormalizerTest::assertEqual(const UnicodeString& input,
+ " iterate got " + hex(result) );
}
}
void BasicNormalizerTest::runIndexedTest(int32_t index, UBool exec, char* &name, char* par)
{
if (exec)
{
logln("Collation Regression Tests: ");
}
switch (index)
{
case 0: name = "TestDecomp"; if (exec) TestDecomp(); break;
case 1: name = "TestCompatDecomp"; if (exec) TestCompatDecomp(); break;
case 2: name = "TestCanonCompose"; if (exec) TestCanonCompose(); break;
case 3: name = "TestCompatCompose"; if (exec) TestCompatCompose(); break;
case 4: name = "TestPrevious"; if (exec) TestPrevious(); break;
case 5: name = "TestHangulDecomp"; if (exec) TestHangulDecomp(); break;
case 6: name = "TestHangulCompose"; if (exec) TestHangulCompose(); break;
default: name = ""; break;
}
}

View File

@ -41,13 +41,13 @@ public:
void TestCompatDecomp(void);
void TestCanonCompose(void);
void TestCompatCompose(void);
void TestTibetan(void);
void TestCompositionExclusion(void);
private:
static UnicodeString canonTests[][3];
static UnicodeString compatTests[][3];
static UnicodeString hangulCanon[][3];
static UnicodeString hangulCompat[][3];
//------------------------------------------------------------------------