From 8f7006eec62b3e7eedfe2a6daefee7fdb1378053 Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Fri, 1 Aug 2003 14:30:29 +0000
Subject: [PATCH] ICU-2449 adjust tests for modified truncated semantics and
 fixes and changes in ucnv_getNextUChar()

X-SVN-Rev: 12726
---
 icu4c/source/test/cintltst/nucnvtst.c | 117 +++++++++++++-------------
 1 file changed, 58 insertions(+), 59 deletions(-)

diff --git a/icu4c/source/test/cintltst/nucnvtst.c b/icu4c/source/test/cintltst/nucnvtst.c
index 5e55d033ad..95b37356a0 100644
--- a/icu4c/source/test/cintltst/nucnvtst.c
+++ b/icu4c/source/test/cintltst/nucnvtst.c
@@ -23,7 +23,7 @@
 #include "unicode/ucol.h"
 #include "cmemory.h"
 
-static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const uint32_t results[], const char* message);
+static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
 static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
 #if !UCONFIG_NO_COLLATION
 static void TestJitterbug981(void);
@@ -135,13 +135,13 @@ static void printUSeqErr(const UChar* a, int len)
 }
 
 static void
-TestNextUChar(UConverter* cnv, const char* source, const char* limit, const uint32_t results[], const char* message)
+TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)
 {
      const char* s0;
      const char* s=(char*)source;
-     const uint32_t *r=results;
+     const int32_t *r=results;
      UErrorCode errorCode=U_ZERO_ERROR;
-     uint32_t c;
+     UChar32 c;
 
      while(s<limit) {
         s0=s;
@@ -153,7 +153,7 @@ TestNextUChar(UConverter* cnv, const char* source, const char* limit, const uint
             break;
         } else if(
             /* test the expected number of input bytes only if >=0 */
-            (*r>=0 && (uint32_t)(s-s0)!=*r) ||
+            (*r>=0 && (int32_t)(s-s0)!=*r) ||
             c!=*(r+1)
         ) {
             log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
@@ -221,12 +221,6 @@ void addTestNewConvert(TestNode** root)
    addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
 
    /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
-#if 0
-   /*
-    * ### TODO results change depending on the ucnv_getNextUChar() implementation
-    * if we go back to the native implementation, then reenable these tests as is
-    * else if we keep the convenience implementation, then modify them first
-    */
    addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
    addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
    addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
@@ -235,16 +229,12 @@ void addTestNewConvert(TestNode** root)
    addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
    addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
    addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
-#endif
 
    addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
    addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
    addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
    addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
-#if 0
-   /* ### TODO figure out how to fix ISO 2022 (see ucnv2022.c) and reenable this test */
    addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
-#endif
    addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
    addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
    addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
@@ -1153,18 +1143,9 @@ static void TestCoverageMBCS(){
         const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
         int32_t  totest1Offs[]        = { 0, 1, 2, 3, 5, };
 
-        const uint8_t test1input[]    = { 0x00, 0x05, 0x06, 0x07, 0x08, 0x09};
-        const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0xfffd, 0xfffd};
-        int32_t fromtest1Offs[]       = { 0, 1, 2, 3, 3, 4, 5};
-
         /*from Unicode*/
         testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
             expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE );
-
-        /*to Unicode*/
-        testConvertToU(test1input, sizeof(test1input),
-            expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test1", fromtest1Offs ,FALSE);
-
     }
 
     /*some more test to increase the code coverage in MBCS.  Create an test converter from test3.ucm
@@ -1678,7 +1659,7 @@ static TestUTF7() {
     };
 
     /* expected test results */
-    static const uint32_t results[]={
+    static const int32_t results[]={
         /* number of bytes read, code point */
         1, 0x48,
         1, 0x2d,
@@ -1722,7 +1703,7 @@ static TestIMAP() {
     };
 
     /* expected test results */
-    static const uint32_t results[]={
+    static const int32_t results[]={
         /* number of bytes read, code point */
         1, 0x48,
         1, 0x2d,
@@ -1764,7 +1745,7 @@ static TestUTF8() {
     };
 
     /* expected test results */
-    static const uint32_t results[]={
+    static const int32_t results[]={
         /* number of bytes read, code point */
         1, 0x61,
         2, 0x80,
@@ -1788,7 +1769,7 @@ static TestUTF8() {
     };
 
     /* expected error test results */
-    static const uint32_t results2[]={
+    static const int32_t results2[]={
         /* number of bytes read, code point */
         1, 0x61,
         22, 0x62
@@ -1831,15 +1812,15 @@ static TestCESU8() {
     };
 
     /* expected test results */
-    static const uint32_t results[]={
+    static const int32_t results[]={
         /* number of bytes read, code point */
         1, 0x61,
         2, 0x80,
         3, 0x800,
         6, 0x10000,
         3, 0xdc01,
-        3, 0xd802,
-        6, 0x10ffff,
+        -1,0xd802,  /* may read 3 or 6 bytes */
+        -1,0x10ffff,/* may read 0 or 3 bytes */
         3, 0xfffc
     };
 
@@ -1860,7 +1841,7 @@ static TestCESU8() {
     };
 
     /* expected error test results */
-    static const uint32_t results2[]={
+    static const int32_t results2[]={
         /* number of bytes read, code point */
         1, 0x61,
         34, 0x62
@@ -1903,17 +1884,17 @@ static TestUTF16() {
     };
 
     /* expected test results */
-    static const uint32_t results1[]={
+    static const int32_t results1[]={
         /* number of bytes read, code point */
         4, 0x4e00,
         2, 0xfeff
     };
-    static const uint32_t results2[]={
+    static const int32_t results2[]={
         /* number of bytes read, code point */
         4, 0x004e,
         2, 0xfffe
     };
-    static const uint32_t results3[]={
+    static const int32_t results3[]={
         /* number of bytes read, code point */
         2, 0xfefe,
         2, 0x4e00,
@@ -1961,7 +1942,7 @@ static TestUTF16BE() {
     };
 
     /* expected test results */
-    static const uint32_t results[]={
+    static const int32_t results[]={
         /* number of bytes read, code point */
         2, 0x61,
         2, 0xc0,
@@ -1984,13 +1965,23 @@ static TestUTF16BE() {
     /*Test for the condition where there is an invalid character*/
     {
         static const uint8_t source2[]={0x61};
+        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
     }
+#if 0
+    /*
+     * Test disabled because currently the UTF-16BE/LE converters are supposed
+     * to not set errors for unpaired surrogates.
+     * This may change with
+     * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
+     */
+
     /*Test for the condition where there is a surrogate pair*/
     {
         const uint8_t source2[]={0xd8, 0x01};
         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
     }
+#endif
     ucnv_close(cnv);
 }
 
@@ -2006,7 +1997,7 @@ TestUTF16LE() {
     };
 
     /* expected test results */
-    static const uint32_t results[]={
+    static const int32_t results[]={
         /* number of bytes read, code point */
         2, 0x61,
         2, 0x31,
@@ -2028,13 +2019,23 @@ TestUTF16LE() {
     /*Test for the condition where there is an invalid character*/
     {
         static const uint8_t source2[]={0x61};
+        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
     }
+#if 0
+    /*
+     * Test disabled because currently the UTF-16BE/LE converters are supposed
+     * to not set errors for unpaired surrogates.
+     * This may change with
+     * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
+     */
+
     /*Test for the condition where there is a surrogate character*/
     {
         static const uint8_t source2[]={0x01, 0xd8};
         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
     }
+#endif
 
     ucnv_close(cnv);
 }
@@ -2053,17 +2054,17 @@ static TestUTF32() {
     };
 
     /* expected test results */
-    static const uint32_t results1[]={
+    static const int32_t results1[]={
         /* number of bytes read, code point */
         8, 0x100f00,
         4, 0xfeff
     };
-    static const uint32_t results2[]={
+    static const int32_t results2[]={
         /* number of bytes read, code point */
         8, 0x0f1000,
         4, 0xfffe
     };
-    static const uint32_t results3[]={
+    static const int32_t results3[]={
         /* number of bytes read, code point */
         4, 0xfefe,
         4, 0x100f00,
@@ -2112,7 +2113,7 @@ TestUTF32BE() {
     };
 
     /* expected test results */
-    static const uint32_t results[]={
+    static const int32_t results[]={
         /* number of bytes read, code point */
         4, 0x61,
         4, 0xdc00,
@@ -2135,7 +2136,7 @@ TestUTF32BE() {
     };
 
     /* expected error test results */
-    static const uint32_t results2[]={
+    static const int32_t results2[]={
         /* number of bytes read, code point */
         4,  0x61,
         8,  0x62,
@@ -2181,7 +2182,7 @@ TestUTF32LE() {
     };
 
     /* expected test results */
-    static const uint32_t results[]={
+    static const int32_t results[]={
         /* number of bytes read, code point */
         4, 0x61,
         4, 0xdc00,
@@ -2204,7 +2205,7 @@ TestUTF32LE() {
     };
 
     /* expected error test results */
-    static const uint32_t results2[]={
+    static const int32_t results2[]={
         /* number of bytes read, code point */
         4,  0x61,
         8,  0x62,
@@ -2249,7 +2250,7 @@ TestLATIN1() {
     };
 
     /* expected test results */
-    static const uint32_t results[]={
+    static const int32_t results[]={
         /* number of bytes read, code point */
         1, 0x61,
         1, 0x31,
@@ -2369,7 +2370,7 @@ TestSBCS() {
     /* test input */
     static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
     /* expected test results */
-    static const uint32_t results[]={
+    static const int32_t results[]={
         /* number of bytes read, code point */
         1, 0x61,
         1, 0xbf,
@@ -2412,7 +2413,7 @@ TestDBCS() {
     };
 
     /* expected test results */
-    static const uint32_t results[]={
+    static const int32_t results[]={
         /* number of bytes read, code point */
         2, 0x00a7,
         2, 0xe1d2,
@@ -2461,7 +2462,7 @@ TestMBCS() {
     };
 
     /* expected test results */
-    static const uint32_t results[]={
+    static const int32_t results[]={
         /* number of bytes read, code point */
         1, 0x0001,
         2, 0x250c,
@@ -2503,28 +2504,24 @@ TestISO_2022() {
     /* test input */
     static const uint8_t in[]={
         0x1b, 0x25, 0x42,
-#if 0
         0x31,
         0x32,
         0x61,
         0xc2, 0x80,
         0xe0, 0xa0, 0x80,
-#endif
         0xf0, 0x90, 0x80, 0x80
     };
 
 
 
     /* expected test results */
-    static const uint32_t results[]={
+    static const int32_t results[]={
         /* number of bytes read, code point */
-#if 0
-        4, 0x0031,
+        4, 0x0031,  /* 4 bytes including the escape sequence */
         1, 0x0032,
         1, 0x61,
         2, 0x80,
         3, 0x800,
-#endif
         4, 0x10000
     };
 
@@ -2545,12 +2542,13 @@ TestISO_2022() {
     /*Test for the condition where we have a truncated char*/
     {
         static const uint8_t source1[]={0xc4};
+        ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
     }
     /*Test for the condition where there is an invalid character*/
     {
         static const uint8_t source2[]={0xa1, 0x01};
-        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
+        TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character");
     }
     ucnv_close(cnv);
 }
@@ -4431,7 +4429,7 @@ TestEBCDIC_STATEFUL() {
     };
 
     /* expected test results */
-    static const uint32_t results[]={
+    static const int32_t results[]={
         /* number of bytes read, code point */
         1, 0x002f,
         1, 0x0092,
@@ -4448,7 +4446,7 @@ TestEBCDIC_STATEFUL() {
     };
 
     /* expected test results */
-    static const uint32_t results2[]={
+    static const int32_t results2[]={
         /* number of bytes read, code point */
         2, 0x203E,
         1, 0x0001,
@@ -4512,7 +4510,7 @@ TestGB18030() {
     };
 
     /* expected test results */
-    static const uint32_t results[]={
+    static const int32_t results[]={
         /* number of bytes read, code point */
         1, 0x24,
         1, 0x7f,
@@ -4966,7 +4964,7 @@ TestLMBCS() {
          }
          /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
          uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
-         if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
+         if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR)
          {
             log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
          }
@@ -5006,6 +5004,7 @@ TestLMBCS() {
          errorCode = U_ZERO_ERROR;
          pUOut = UOut;
 
+         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
          ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
          if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
          {