From 8f7006eec62b3e7eedfe2a6daefee7fdb1378053 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Fri, 1 Aug 2003 14:30:29 +0000 Subject: [PATCH] ICU-2449 adjust tests for modified truncated semantics and fixes and changes in ucnv_getNextUChar() X-SVN-Rev: 12726 --- icu4c/source/test/cintltst/nucnvtst.c | 117 +++++++++++++------------- 1 file changed, 58 insertions(+), 59 deletions(-) diff --git a/icu4c/source/test/cintltst/nucnvtst.c b/icu4c/source/test/cintltst/nucnvtst.c index 5e55d033ad..95b37356a0 100644 --- a/icu4c/source/test/cintltst/nucnvtst.c +++ b/icu4c/source/test/cintltst/nucnvtst.c @@ -23,7 +23,7 @@ #include "unicode/ucol.h" #include "cmemory.h" -static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const uint32_t results[], const char* message); +static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message); static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message); #if !UCONFIG_NO_COLLATION static void TestJitterbug981(void); @@ -135,13 +135,13 @@ static void printUSeqErr(const UChar* a, int len) } static void -TestNextUChar(UConverter* cnv, const char* source, const char* limit, const uint32_t results[], const char* message) +TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message) { const char* s0; const char* s=(char*)source; - const uint32_t *r=results; + const int32_t *r=results; UErrorCode errorCode=U_ZERO_ERROR; - uint32_t c; + UChar32 c; while(s=0 */ - (*r>=0 && (uint32_t)(s-s0)!=*r) || + (*r>=0 && (int32_t)(s-s0)!=*r) || c!=*(r+1) ) { log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n", @@ -221,12 +221,6 @@ void addTestNewConvert(TestNode** root) addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8"); /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */ -#if 0 - /* - * ### TODO results change depending on the ucnv_getNextUChar() implementation - * if we go back to the native implementation, then reenable these tests as is - * else if we keep the convenience implementation, then modify them first - */ addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8"); addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16"); addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE"); @@ -235,16 +229,12 @@ void addTestNewConvert(TestNode** root) addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE"); addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE"); addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS"); -#endif addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1"); addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS"); addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS"); addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS"); -#if 0 - /* ### TODO figure out how to fix ISO 2022 (see ucnv2022.c) and reenable this test */ addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022"); -#endif addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP"); addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS"); addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1"); @@ -1153,18 +1143,9 @@ static void TestCoverageMBCS(){ const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,}; int32_t totest1Offs[] = { 0, 1, 2, 3, 5, }; - const uint8_t test1input[] = { 0x00, 0x05, 0x06, 0x07, 0x08, 0x09}; - const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0xfffd, 0xfffd}; - int32_t fromtest1Offs[] = { 0, 1, 2, 3, 3, 4, 5}; - /*from Unicode*/ testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE ); - - /*to Unicode*/ - testConvertToU(test1input, sizeof(test1input), - expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test1", fromtest1Offs ,FALSE); - } /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm @@ -1678,7 +1659,7 @@ static TestUTF7() { }; /* expected test results */ - static const uint32_t results[]={ + static const int32_t results[]={ /* number of bytes read, code point */ 1, 0x48, 1, 0x2d, @@ -1722,7 +1703,7 @@ static TestIMAP() { }; /* expected test results */ - static const uint32_t results[]={ + static const int32_t results[]={ /* number of bytes read, code point */ 1, 0x48, 1, 0x2d, @@ -1764,7 +1745,7 @@ static TestUTF8() { }; /* expected test results */ - static const uint32_t results[]={ + static const int32_t results[]={ /* number of bytes read, code point */ 1, 0x61, 2, 0x80, @@ -1788,7 +1769,7 @@ static TestUTF8() { }; /* expected error test results */ - static const uint32_t results2[]={ + static const int32_t results2[]={ /* number of bytes read, code point */ 1, 0x61, 22, 0x62 @@ -1831,15 +1812,15 @@ static TestCESU8() { }; /* expected test results */ - static const uint32_t results[]={ + static const int32_t results[]={ /* number of bytes read, code point */ 1, 0x61, 2, 0x80, 3, 0x800, 6, 0x10000, 3, 0xdc01, - 3, 0xd802, - 6, 0x10ffff, + -1,0xd802, /* may read 3 or 6 bytes */ + -1,0x10ffff,/* may read 0 or 3 bytes */ 3, 0xfffc }; @@ -1860,7 +1841,7 @@ static TestCESU8() { }; /* expected error test results */ - static const uint32_t results2[]={ + static const int32_t results2[]={ /* number of bytes read, code point */ 1, 0x61, 34, 0x62 @@ -1903,17 +1884,17 @@ static TestUTF16() { }; /* expected test results */ - static const uint32_t results1[]={ + static const int32_t results1[]={ /* number of bytes read, code point */ 4, 0x4e00, 2, 0xfeff }; - static const uint32_t results2[]={ + static const int32_t results2[]={ /* number of bytes read, code point */ 4, 0x004e, 2, 0xfffe }; - static const uint32_t results3[]={ + static const int32_t results3[]={ /* number of bytes read, code point */ 2, 0xfefe, 2, 0x4e00, @@ -1961,7 +1942,7 @@ static TestUTF16BE() { }; /* expected test results */ - static const uint32_t results[]={ + static const int32_t results[]={ /* number of bytes read, code point */ 2, 0x61, 2, 0xc0, @@ -1984,13 +1965,23 @@ static TestUTF16BE() { /*Test for the condition where there is an invalid character*/ { static const uint8_t source2[]={0x61}; + ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); } +#if 0 + /* + * Test disabled because currently the UTF-16BE/LE converters are supposed + * to not set errors for unpaired surrogates. + * This may change with + * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 + */ + /*Test for the condition where there is a surrogate pair*/ { const uint8_t source2[]={0xd8, 0x01}; TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); } +#endif ucnv_close(cnv); } @@ -2006,7 +1997,7 @@ TestUTF16LE() { }; /* expected test results */ - static const uint32_t results[]={ + static const int32_t results[]={ /* number of bytes read, code point */ 2, 0x61, 2, 0x31, @@ -2028,13 +2019,23 @@ TestUTF16LE() { /*Test for the condition where there is an invalid character*/ { static const uint8_t source2[]={0x61}; + ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); } +#if 0 + /* + * Test disabled because currently the UTF-16BE/LE converters are supposed + * to not set errors for unpaired surrogates. + * This may change with + * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 + */ + /*Test for the condition where there is a surrogate character*/ { static const uint8_t source2[]={0x01, 0xd8}; TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); } +#endif ucnv_close(cnv); } @@ -2053,17 +2054,17 @@ static TestUTF32() { }; /* expected test results */ - static const uint32_t results1[]={ + static const int32_t results1[]={ /* number of bytes read, code point */ 8, 0x100f00, 4, 0xfeff }; - static const uint32_t results2[]={ + static const int32_t results2[]={ /* number of bytes read, code point */ 8, 0x0f1000, 4, 0xfffe }; - static const uint32_t results3[]={ + static const int32_t results3[]={ /* number of bytes read, code point */ 4, 0xfefe, 4, 0x100f00, @@ -2112,7 +2113,7 @@ TestUTF32BE() { }; /* expected test results */ - static const uint32_t results[]={ + static const int32_t results[]={ /* number of bytes read, code point */ 4, 0x61, 4, 0xdc00, @@ -2135,7 +2136,7 @@ TestUTF32BE() { }; /* expected error test results */ - static const uint32_t results2[]={ + static const int32_t results2[]={ /* number of bytes read, code point */ 4, 0x61, 8, 0x62, @@ -2181,7 +2182,7 @@ TestUTF32LE() { }; /* expected test results */ - static const uint32_t results[]={ + static const int32_t results[]={ /* number of bytes read, code point */ 4, 0x61, 4, 0xdc00, @@ -2204,7 +2205,7 @@ TestUTF32LE() { }; /* expected error test results */ - static const uint32_t results2[]={ + static const int32_t results2[]={ /* number of bytes read, code point */ 4, 0x61, 8, 0x62, @@ -2249,7 +2250,7 @@ TestLATIN1() { }; /* expected test results */ - static const uint32_t results[]={ + static const int32_t results[]={ /* number of bytes read, code point */ 1, 0x61, 1, 0x31, @@ -2369,7 +2370,7 @@ TestSBCS() { /* test input */ static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4}; /* expected test results */ - static const uint32_t results[]={ + static const int32_t results[]={ /* number of bytes read, code point */ 1, 0x61, 1, 0xbf, @@ -2412,7 +2413,7 @@ TestDBCS() { }; /* expected test results */ - static const uint32_t results[]={ + static const int32_t results[]={ /* number of bytes read, code point */ 2, 0x00a7, 2, 0xe1d2, @@ -2461,7 +2462,7 @@ TestMBCS() { }; /* expected test results */ - static const uint32_t results[]={ + static const int32_t results[]={ /* number of bytes read, code point */ 1, 0x0001, 2, 0x250c, @@ -2503,28 +2504,24 @@ TestISO_2022() { /* test input */ static const uint8_t in[]={ 0x1b, 0x25, 0x42, -#if 0 0x31, 0x32, 0x61, 0xc2, 0x80, 0xe0, 0xa0, 0x80, -#endif 0xf0, 0x90, 0x80, 0x80 }; /* expected test results */ - static const uint32_t results[]={ + static const int32_t results[]={ /* number of bytes read, code point */ -#if 0 - 4, 0x0031, + 4, 0x0031, /* 4 bytes including the escape sequence */ 1, 0x0032, 1, 0x61, 2, 0x80, 3, 0x800, -#endif 4, 0x10000 }; @@ -2545,12 +2542,13 @@ TestISO_2022() { /*Test for the condition where we have a truncated char*/ { static const uint8_t source1[]={0xc4}; + ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); } /*Test for the condition where there is an invalid character*/ { static const uint8_t source2[]={0xa1, 0x01}; - TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); + TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character"); } ucnv_close(cnv); } @@ -4431,7 +4429,7 @@ TestEBCDIC_STATEFUL() { }; /* expected test results */ - static const uint32_t results[]={ + static const int32_t results[]={ /* number of bytes read, code point */ 1, 0x002f, 1, 0x0092, @@ -4448,7 +4446,7 @@ TestEBCDIC_STATEFUL() { }; /* expected test results */ - static const uint32_t results2[]={ + static const int32_t results2[]={ /* number of bytes read, code point */ 2, 0x203E, 1, 0x0001, @@ -4512,7 +4510,7 @@ TestGB18030() { }; /* expected test results */ - static const uint32_t results[]={ + static const int32_t results[]={ /* number of bytes read, code point */ 1, 0x24, 1, 0x7f, @@ -4966,7 +4964,7 @@ TestLMBCS() { } /*0 byte source request - GetNextUChar : error & value == fffe or ffff */ uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode); - if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) + if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR) { log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode)); } @@ -5006,6 +5004,7 @@ TestLMBCS() { errorCode = U_ZERO_ERROR; pUOut = UOut; + ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5) {