From 25ed9f9c742031d423ff89603c026be276bc7389 Mon Sep 17 00:00:00 2001 From: Andy Heninger Date: Mon, 7 Jun 2004 17:11:07 +0000 Subject: [PATCH] ICU-3786 port Mark's new Han translit tests from Java to C++ X-SVN-Rev: 15755 --- icu4c/source/test/intltest/transrt.cpp | 109 +++++++++++++++++++++++++ icu4c/source/test/intltest/transrt.h | 1 + 2 files changed, 110 insertions(+) diff --git a/icu4c/source/test/intltest/transrt.cpp b/icu4c/source/test/intltest/transrt.cpp index 8e1032f612..b9480556d8 100644 --- a/icu4c/source/test/intltest/transrt.cpp +++ b/icu4c/source/test/intltest/transrt.cpp @@ -22,10 +22,14 @@ #include "unicode/usetiter.h" #include "unicode/putil.h" #include "unicode/uversion.h" +#include "unicode/locid.h" +#include "unicode/ulocdata.h" +#include "unicode/utf8.h" #include "cmemory.h" #include "transrt.h" #include "testutil.h" #include +#include #define CASE(id,test) case id: \ name = #test; \ @@ -68,6 +72,7 @@ TransliteratorRoundTripTest::runIndexedTest(int32_t index, UBool exec, CASE(9,TestInterIndic); CASE(10, TestHebrew); CASE(11, TestArabic); + CASE(12, TestHan); default: name = ""; break; } } @@ -1027,6 +1032,110 @@ void TransliteratorRoundTripTest::TestHangul() { delete legal; } + +#define ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \ + errln("error at file %s, line %d, status = %s", __FILE__, __LINE__, \ + u_errorName(status)); \ + return;}} + + +static void writeStringInU8(FILE *out, const UnicodeString &s) { + int i; + for (i=0; icharAt(i); + if (c == (UChar32)-1) { + break; + } + source.append(c); + } + + // transform with Han translit + Transliterator *hanTL = Transliterator::createInstance("Han-Latin", UTRANS_FORWARD, status); + ASSERT_SUCCESS(status); + UnicodeString target=source; + hanTL->transliterate(target); + // now verify that there are no Han characters left + UnicodeSet allHan("[:han:]", status); + ASSERT_SUCCESS(status); + if (allHan.containsSome(target)) { + errln("file %s, line %d, No Han must be left after Han-Latin transliteration", + __FILE__, __LINE__); + } + + // check the pinyin translit + Transliterator *pn = Transliterator::createInstance("Latin-NumericPinyin", UTRANS_FORWARD, status); + ASSERT_SUCCESS(status); + UnicodeString target2 = target; + pn->transliterate(target2); + + // verify that there are no marks + Transliterator *nfc = Transliterator::createInstance("nfc", UTRANS_FORWARD, status); + ASSERT_SUCCESS(status); + + UnicodeString nfced = target2; + nfc->transliterate(nfced); + UnicodeSet allMarks("[:mark:]", status); + ASSERT_SUCCESS(status); + assertFalse("NumericPinyin must contain no marks", allMarks.containsSome(nfced)); + + // verify roundtrip + Transliterator *np = pn->createInverse(status); + ASSERT_SUCCESS(status); + UnicodeString target3 = target; + np->transliterate(target3); + UBool roundtripOK = (target3.compare(target) == 0); + assertTrue("NumericPinyin must roundtrip", roundtripOK); + if (!roundtripOK) { + const char *filename = "numeric-pinyin.log.txt"; + FILE *out = fopen(filename, "w"); + errln("Creating log file %s\n", filename); + fprintf(out, "Pinyin: "); + writeStringInU8(out, target); + fprintf(out, "\nPinyin-Numeric-Pinyin: "); + writeStringInU8(out, target2); + fprintf(out, "\n"); + fclose(out); + } + + delete hanTL; + delete pn; + delete nfc; + delete np; + uset_close(USetExemplars); +} + + void TransliteratorRoundTripTest::TestGreek() { // weiv removed the test and the fiter /* diff --git a/icu4c/source/test/intltest/transrt.h b/icu4c/source/test/intltest/transrt.h index f85e5aabe1..196ad7d97b 100644 --- a/icu4c/source/test/intltest/transrt.h +++ b/icu4c/source/test/intltest/transrt.h @@ -31,6 +31,7 @@ class TransliteratorRoundTripTest : public IntlTest { void TestKatakana(void); void TestJamo(void); void TestHangul(void); + void TestHan(void); void TestGreek(void); void TestGreekUNGEGN(void); void Testel(void);