ICU-2449 adjust tests for modified truncated semantics and fixes and changes in ucnv_getNextUChar()

X-SVN-Rev: 12726
This commit is contained in:
Markus Scherer 2003-08-01 14:30:29 +00:00
parent 8fcfb9fe32
commit 8f7006eec6

View File

@ -23,7 +23,7 @@
#include "unicode/ucol.h"
#include "cmemory.h"
static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const uint32_t results[], const char* message);
static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
#if !UCONFIG_NO_COLLATION
static void TestJitterbug981(void);
@ -135,13 +135,13 @@ static void printUSeqErr(const UChar* a, int len)
}
static void
TestNextUChar(UConverter* cnv, const char* source, const char* limit, const uint32_t results[], const char* message)
TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)
{
const char* s0;
const char* s=(char*)source;
const uint32_t *r=results;
const int32_t *r=results;
UErrorCode errorCode=U_ZERO_ERROR;
uint32_t c;
UChar32 c;
while(s<limit) {
s0=s;
@ -153,7 +153,7 @@ TestNextUChar(UConverter* cnv, const char* source, const char* limit, const uint
break;
} else if(
/* test the expected number of input bytes only if >=0 */
(*r>=0 && (uint32_t)(s-s0)!=*r) ||
(*r>=0 && (int32_t)(s-s0)!=*r) ||
c!=*(r+1)
) {
log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
@ -221,12 +221,6 @@ void addTestNewConvert(TestNode** root)
addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
/* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
#if 0
/*
* ### TODO results change depending on the ucnv_getNextUChar() implementation
* if we go back to the native implementation, then reenable these tests as is
* else if we keep the convenience implementation, then modify them first
*/
addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
@ -235,16 +229,12 @@ void addTestNewConvert(TestNode** root)
addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
#endif
addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
#if 0
/* ### TODO figure out how to fix ISO 2022 (see ucnv2022.c) and reenable this test */
addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
#endif
addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
@ -1153,18 +1143,9 @@ static void TestCoverageMBCS(){
const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
int32_t totest1Offs[] = { 0, 1, 2, 3, 5, };
const uint8_t test1input[] = { 0x00, 0x05, 0x06, 0x07, 0x08, 0x09};
const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0xfffd, 0xfffd};
int32_t fromtest1Offs[] = { 0, 1, 2, 3, 3, 4, 5};
/*from Unicode*/
testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE );
/*to Unicode*/
testConvertToU(test1input, sizeof(test1input),
expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test1", fromtest1Offs ,FALSE);
}
/*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm
@ -1678,7 +1659,7 @@ static TestUTF7() {
};
/* expected test results */
static const uint32_t results[]={
static const int32_t results[]={
/* number of bytes read, code point */
1, 0x48,
1, 0x2d,
@ -1722,7 +1703,7 @@ static TestIMAP() {
};
/* expected test results */
static const uint32_t results[]={
static const int32_t results[]={
/* number of bytes read, code point */
1, 0x48,
1, 0x2d,
@ -1764,7 +1745,7 @@ static TestUTF8() {
};
/* expected test results */
static const uint32_t results[]={
static const int32_t results[]={
/* number of bytes read, code point */
1, 0x61,
2, 0x80,
@ -1788,7 +1769,7 @@ static TestUTF8() {
};
/* expected error test results */
static const uint32_t results2[]={
static const int32_t results2[]={
/* number of bytes read, code point */
1, 0x61,
22, 0x62
@ -1831,15 +1812,15 @@ static TestCESU8() {
};
/* expected test results */
static const uint32_t results[]={
static const int32_t results[]={
/* number of bytes read, code point */
1, 0x61,
2, 0x80,
3, 0x800,
6, 0x10000,
3, 0xdc01,
3, 0xd802,
6, 0x10ffff,
-1,0xd802, /* may read 3 or 6 bytes */
-1,0x10ffff,/* may read 0 or 3 bytes */
3, 0xfffc
};
@ -1860,7 +1841,7 @@ static TestCESU8() {
};
/* expected error test results */
static const uint32_t results2[]={
static const int32_t results2[]={
/* number of bytes read, code point */
1, 0x61,
34, 0x62
@ -1903,17 +1884,17 @@ static TestUTF16() {
};
/* expected test results */
static const uint32_t results1[]={
static const int32_t results1[]={
/* number of bytes read, code point */
4, 0x4e00,
2, 0xfeff
};
static const uint32_t results2[]={
static const int32_t results2[]={
/* number of bytes read, code point */
4, 0x004e,
2, 0xfffe
};
static const uint32_t results3[]={
static const int32_t results3[]={
/* number of bytes read, code point */
2, 0xfefe,
2, 0x4e00,
@ -1961,7 +1942,7 @@ static TestUTF16BE() {
};
/* expected test results */
static const uint32_t results[]={
static const int32_t results[]={
/* number of bytes read, code point */
2, 0x61,
2, 0xc0,
@ -1984,13 +1965,23 @@ static TestUTF16BE() {
/*Test for the condition where there is an invalid character*/
{
static const uint8_t source2[]={0x61};
ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
}
#if 0
/*
* Test disabled because currently the UTF-16BE/LE converters are supposed
* to not set errors for unpaired surrogates.
* This may change with
* Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
*/
/*Test for the condition where there is a surrogate pair*/
{
const uint8_t source2[]={0xd8, 0x01};
TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
}
#endif
ucnv_close(cnv);
}
@ -2006,7 +1997,7 @@ TestUTF16LE() {
};
/* expected test results */
static const uint32_t results[]={
static const int32_t results[]={
/* number of bytes read, code point */
2, 0x61,
2, 0x31,
@ -2028,13 +2019,23 @@ TestUTF16LE() {
/*Test for the condition where there is an invalid character*/
{
static const uint8_t source2[]={0x61};
ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
}
#if 0
/*
* Test disabled because currently the UTF-16BE/LE converters are supposed
* to not set errors for unpaired surrogates.
* This may change with
* Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
*/
/*Test for the condition where there is a surrogate character*/
{
static const uint8_t source2[]={0x01, 0xd8};
TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
}
#endif
ucnv_close(cnv);
}
@ -2053,17 +2054,17 @@ static TestUTF32() {
};
/* expected test results */
static const uint32_t results1[]={
static const int32_t results1[]={
/* number of bytes read, code point */
8, 0x100f00,
4, 0xfeff
};
static const uint32_t results2[]={
static const int32_t results2[]={
/* number of bytes read, code point */
8, 0x0f1000,
4, 0xfffe
};
static const uint32_t results3[]={
static const int32_t results3[]={
/* number of bytes read, code point */
4, 0xfefe,
4, 0x100f00,
@ -2112,7 +2113,7 @@ TestUTF32BE() {
};
/* expected test results */
static const uint32_t results[]={
static const int32_t results[]={
/* number of bytes read, code point */
4, 0x61,
4, 0xdc00,
@ -2135,7 +2136,7 @@ TestUTF32BE() {
};
/* expected error test results */
static const uint32_t results2[]={
static const int32_t results2[]={
/* number of bytes read, code point */
4, 0x61,
8, 0x62,
@ -2181,7 +2182,7 @@ TestUTF32LE() {
};
/* expected test results */
static const uint32_t results[]={
static const int32_t results[]={
/* number of bytes read, code point */
4, 0x61,
4, 0xdc00,
@ -2204,7 +2205,7 @@ TestUTF32LE() {
};
/* expected error test results */
static const uint32_t results2[]={
static const int32_t results2[]={
/* number of bytes read, code point */
4, 0x61,
8, 0x62,
@ -2249,7 +2250,7 @@ TestLATIN1() {
};
/* expected test results */
static const uint32_t results[]={
static const int32_t results[]={
/* number of bytes read, code point */
1, 0x61,
1, 0x31,
@ -2369,7 +2370,7 @@ TestSBCS() {
/* test input */
static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
/* expected test results */
static const uint32_t results[]={
static const int32_t results[]={
/* number of bytes read, code point */
1, 0x61,
1, 0xbf,
@ -2412,7 +2413,7 @@ TestDBCS() {
};
/* expected test results */
static const uint32_t results[]={
static const int32_t results[]={
/* number of bytes read, code point */
2, 0x00a7,
2, 0xe1d2,
@ -2461,7 +2462,7 @@ TestMBCS() {
};
/* expected test results */
static const uint32_t results[]={
static const int32_t results[]={
/* number of bytes read, code point */
1, 0x0001,
2, 0x250c,
@ -2503,28 +2504,24 @@ TestISO_2022() {
/* test input */
static const uint8_t in[]={
0x1b, 0x25, 0x42,
#if 0
0x31,
0x32,
0x61,
0xc2, 0x80,
0xe0, 0xa0, 0x80,
#endif
0xf0, 0x90, 0x80, 0x80
};
/* expected test results */
static const uint32_t results[]={
static const int32_t results[]={
/* number of bytes read, code point */
#if 0
4, 0x0031,
4, 0x0031, /* 4 bytes including the escape sequence */
1, 0x0032,
1, 0x61,
2, 0x80,
3, 0x800,
#endif
4, 0x10000
};
@ -2545,12 +2542,13 @@ TestISO_2022() {
/*Test for the condition where we have a truncated char*/
{
static const uint8_t source1[]={0xc4};
ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
}
/*Test for the condition where there is an invalid character*/
{
static const uint8_t source2[]={0xa1, 0x01};
TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character");
}
ucnv_close(cnv);
}
@ -4431,7 +4429,7 @@ TestEBCDIC_STATEFUL() {
};
/* expected test results */
static const uint32_t results[]={
static const int32_t results[]={
/* number of bytes read, code point */
1, 0x002f,
1, 0x0092,
@ -4448,7 +4446,7 @@ TestEBCDIC_STATEFUL() {
};
/* expected test results */
static const uint32_t results2[]={
static const int32_t results2[]={
/* number of bytes read, code point */
2, 0x203E,
1, 0x0001,
@ -4512,7 +4510,7 @@ TestGB18030() {
};
/* expected test results */
static const uint32_t results[]={
static const int32_t results[]={
/* number of bytes read, code point */
1, 0x24,
1, 0x7f,
@ -4966,7 +4964,7 @@ TestLMBCS() {
}
/*0 byte source request - GetNextUChar : error & value == fffe or ffff */
uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR)
{
log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
}
@ -5006,6 +5004,7 @@ TestLMBCS() {
errorCode = U_ZERO_ERROR;
pUOut = UOut;
ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
{