ICU-2449 adjust tests for modified truncated semantics and fixes and changes in ucnv_getNextUChar()

X-SVN-Rev: 12726
2003-08-01 14:30:29 +00:00 · 2003-08-01 14:30:29 +00:00 · 8f7006eec6
commit 8f7006eec6
parent 8fcfb9fe32
1 changed files with 58 additions and 59 deletions
--- a/icu4c/source/test/cintltst/nucnvtst.c
+++ b/icu4c/source/test/cintltst/nucnvtst.c
@ -23,7 +23,7 @@
 #include "unicode/ucol.h"
 #include "cmemory.h"

-static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const uint32_t results[], const char* message);
+static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
 static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
 #if !UCONFIG_NO_COLLATION
 static void TestJitterbug981(void);
@ -135,13 +135,13 @@ static void printUSeqErr(const UChar* a, int len)
 }

 static void
-TestNextUChar(UConverter* cnv, const char* source, const char* limit, const uint32_t results[], const char* message)
+TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)
 {
     const char* s0;
     const char* s=(char*)source;
-     const uint32_t *r=results;
+     const int32_t *r=results;
     UErrorCode errorCode=U_ZERO_ERROR;
-     uint32_t c;
+     UChar32 c;

     while(s<limit) {
        s0=s;
@ -153,7 +153,7 @@ TestNextUChar(UConverter* cnv, const char* source, const char* limit, const uint
            break;
        } else if(
            /* test the expected number of input bytes only if >=0 */
-            (*r>=0 && (uint32_t)(s-s0)!=*r) ||
+            (*r>=0 && (int32_t)(s-s0)!=*r) ||
            c!=*(r+1)
        ) {
            log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
@ -221,12 +221,6 @@ void addTestNewConvert(TestNode** root)
   addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");

   /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
-#if 0
-   /*
-    * ### TODO results change depending on the ucnv_getNextUChar() implementation
-    * if we go back to the native implementation, then reenable these tests as is
-    * else if we keep the convenience implementation, then modify them first
-    */
   addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
   addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
   addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
@ -235,16 +229,12 @@ void addTestNewConvert(TestNode** root)
   addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
   addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
   addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
-#endif

   addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
   addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
   addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
   addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
-#if 0
-   /* ### TODO figure out how to fix ISO 2022 (see ucnv2022.c) and reenable this test */
   addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
-#endif
   addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
   addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
   addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
@ -1153,18 +1143,9 @@ static void TestCoverageMBCS(){
        const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
        int32_t  totest1Offs[]        = { 0, 1, 2, 3, 5, };

-        const uint8_t test1input[]    = { 0x00, 0x05, 0x06, 0x07, 0x08, 0x09};
-        const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0xfffd, 0xfffd};
-        int32_t fromtest1Offs[]       = { 0, 1, 2, 3, 3, 4, 5};
-
        /*from Unicode*/
        testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
            expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE );
-
-        /*to Unicode*/
-        testConvertToU(test1input, sizeof(test1input),
-            expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test1", fromtest1Offs ,FALSE);
-
    }

    /*some more test to increase the code coverage in MBCS.  Create an test converter from test3.ucm
@ -1678,7 +1659,7 @@ static TestUTF7() {
    };

    /* expected test results */
-    static const uint32_t results[]={
+    static const int32_t results[]={
        /* number of bytes read, code point */
        1, 0x48,
        1, 0x2d,
@ -1722,7 +1703,7 @@ static TestIMAP() {
    };

    /* expected test results */
-    static const uint32_t results[]={
+    static const int32_t results[]={
        /* number of bytes read, code point */
        1, 0x48,
        1, 0x2d,
@ -1764,7 +1745,7 @@ static TestUTF8() {
    };

    /* expected test results */
-    static const uint32_t results[]={
+    static const int32_t results[]={
        /* number of bytes read, code point */
        1, 0x61,
        2, 0x80,
@ -1788,7 +1769,7 @@ static TestUTF8() {
    };

    /* expected error test results */
-    static const uint32_t results2[]={
+    static const int32_t results2[]={
        /* number of bytes read, code point */
        1, 0x61,
        22, 0x62
@ -1831,15 +1812,15 @@ static TestCESU8() {
    };

    /* expected test results */
-    static const uint32_t results[]={
+    static const int32_t results[]={
        /* number of bytes read, code point */
        1, 0x61,
        2, 0x80,
        3, 0x800,
        6, 0x10000,
        3, 0xdc01,
-        3, 0xd802,
-        6, 0x10ffff,
+        -1,0xd802,  /* may read 3 or 6 bytes */
+        -1,0x10ffff,/* may read 0 or 3 bytes */
        3, 0xfffc
    };

@ -1860,7 +1841,7 @@ static TestCESU8() {
    };

    /* expected error test results */
-    static const uint32_t results2[]={
+    static const int32_t results2[]={
        /* number of bytes read, code point */
        1, 0x61,
        34, 0x62
@ -1903,17 +1884,17 @@ static TestUTF16() {
    };

    /* expected test results */
-    static const uint32_t results1[]={
+    static const int32_t results1[]={
        /* number of bytes read, code point */
        4, 0x4e00,
        2, 0xfeff
    };
-    static const uint32_t results2[]={
+    static const int32_t results2[]={
        /* number of bytes read, code point */
        4, 0x004e,
        2, 0xfffe
    };
-    static const uint32_t results3[]={
+    static const int32_t results3[]={
        /* number of bytes read, code point */
        2, 0xfefe,
        2, 0x4e00,
@ -1961,7 +1942,7 @@ static TestUTF16BE() {
    };

    /* expected test results */
-    static const uint32_t results[]={
+    static const int32_t results[]={
        /* number of bytes read, code point */
        2, 0x61,
        2, 0xc0,
@ -1984,13 +1965,23 @@ static TestUTF16BE() {
    /*Test for the condition where there is an invalid character*/
    {
        static const uint8_t source2[]={0x61};
+        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
    }
+#if 0
+    /*
+     * Test disabled because currently the UTF-16BE/LE converters are supposed
+     * to not set errors for unpaired surrogates.
+     * This may change with
+     * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
+     */
+
    /*Test for the condition where there is a surrogate pair*/
    {
        const uint8_t source2[]={0xd8, 0x01};
        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
    }
+#endif
    ucnv_close(cnv);
 }

@ -2006,7 +1997,7 @@ TestUTF16LE() {
    };

    /* expected test results */
-    static const uint32_t results[]={
+    static const int32_t results[]={
        /* number of bytes read, code point */
        2, 0x61,
        2, 0x31,
@ -2028,13 +2019,23 @@ TestUTF16LE() {
    /*Test for the condition where there is an invalid character*/
    {
        static const uint8_t source2[]={0x61};
+        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
    }
+#if 0
+    /*
+     * Test disabled because currently the UTF-16BE/LE converters are supposed
+     * to not set errors for unpaired surrogates.
+     * This may change with
+     * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
+     */
+
    /*Test for the condition where there is a surrogate character*/
    {
        static const uint8_t source2[]={0x01, 0xd8};
        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
    }
+#endif

    ucnv_close(cnv);
 }
@ -2053,17 +2054,17 @@ static TestUTF32() {
    };

    /* expected test results */
-    static const uint32_t results1[]={
+    static const int32_t results1[]={
        /* number of bytes read, code point */
        8, 0x100f00,
        4, 0xfeff
    };
-    static const uint32_t results2[]={
+    static const int32_t results2[]={
        /* number of bytes read, code point */
        8, 0x0f1000,
        4, 0xfffe
    };
-    static const uint32_t results3[]={
+    static const int32_t results3[]={
        /* number of bytes read, code point */
        4, 0xfefe,
        4, 0x100f00,
@ -2112,7 +2113,7 @@ TestUTF32BE() {
    };

    /* expected test results */
-    static const uint32_t results[]={
+    static const int32_t results[]={
        /* number of bytes read, code point */
        4, 0x61,
        4, 0xdc00,
@ -2135,7 +2136,7 @@ TestUTF32BE() {
    };

    /* expected error test results */
-    static const uint32_t results2[]={
+    static const int32_t results2[]={
        /* number of bytes read, code point */
        4,  0x61,
        8,  0x62,
@ -2181,7 +2182,7 @@ TestUTF32LE() {
    };

    /* expected test results */
-    static const uint32_t results[]={
+    static const int32_t results[]={
        /* number of bytes read, code point */
        4, 0x61,
        4, 0xdc00,
@ -2204,7 +2205,7 @@ TestUTF32LE() {
    };

    /* expected error test results */
-    static const uint32_t results2[]={
+    static const int32_t results2[]={
        /* number of bytes read, code point */
        4,  0x61,
        8,  0x62,
@ -2249,7 +2250,7 @@ TestLATIN1() {
    };

    /* expected test results */
-    static const uint32_t results[]={
+    static const int32_t results[]={
        /* number of bytes read, code point */
        1, 0x61,
        1, 0x31,
@ -2369,7 +2370,7 @@ TestSBCS() {
    /* test input */
    static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
    /* expected test results */
-    static const uint32_t results[]={
+    static const int32_t results[]={
        /* number of bytes read, code point */
        1, 0x61,
        1, 0xbf,
@ -2412,7 +2413,7 @@ TestDBCS() {
    };

    /* expected test results */
-    static const uint32_t results[]={
+    static const int32_t results[]={
        /* number of bytes read, code point */
        2, 0x00a7,
        2, 0xe1d2,
@ -2461,7 +2462,7 @@ TestMBCS() {
    };

    /* expected test results */
-    static const uint32_t results[]={
+    static const int32_t results[]={
        /* number of bytes read, code point */
        1, 0x0001,
        2, 0x250c,
@ -2503,28 +2504,24 @@ TestISO_2022() {
    /* test input */
    static const uint8_t in[]={
        0x1b, 0x25, 0x42,
-#if 0
        0x31,
        0x32,
        0x61,
        0xc2, 0x80,
        0xe0, 0xa0, 0x80,
-#endif
        0xf0, 0x90, 0x80, 0x80
    };



    /* expected test results */
-    static const uint32_t results[]={
+    static const int32_t results[]={
        /* number of bytes read, code point */
-#if 0
-        4, 0x0031,
+        4, 0x0031,  /* 4 bytes including the escape sequence */
        1, 0x0032,
        1, 0x61,
        2, 0x80,
        3, 0x800,
-#endif
        4, 0x10000
    };

@ -2545,12 +2542,13 @@ TestISO_2022() {
    /*Test for the condition where we have a truncated char*/
    {
        static const uint8_t source1[]={0xc4};
+        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
        TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
    }
    /*Test for the condition where there is an invalid character*/
    {
        static const uint8_t source2[]={0xa1, 0x01};
-        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
+        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character");
    }
    ucnv_close(cnv);
 }
@ -4431,7 +4429,7 @@ TestEBCDIC_STATEFUL() {
    };

    /* expected test results */
-    static const uint32_t results[]={
+    static const int32_t results[]={
        /* number of bytes read, code point */
        1, 0x002f,
        1, 0x0092,
@ -4448,7 +4446,7 @@ TestEBCDIC_STATEFUL() {
    };

    /* expected test results */
-    static const uint32_t results2[]={
+    static const int32_t results2[]={
        /* number of bytes read, code point */
        2, 0x203E,
        1, 0x0001,
@ -4512,7 +4510,7 @@ TestGB18030() {
    };

    /* expected test results */
-    static const uint32_t results[]={
+    static const int32_t results[]={
        /* number of bytes read, code point */
        1, 0x24,
        1, 0x7f,
@ -4966,7 +4964,7 @@ TestLMBCS() {
         }
         /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
         uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
-         if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
+         if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR)
         {
            log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
         }
@ -5006,6 +5004,7 @@ TestLMBCS() {
         errorCode = U_ZERO_ERROR;
         pUOut = UOut;

+         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
         ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
         if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
         {