/******************************************************************** * COPYRIGHT: * Copyright (c) 1997-2011, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ /******************************************************************************* * * File CCONVTST.C * * Modification History: * Name Description * Steven R. Loomis 7/8/1999 Adding input buffer test ******************************************************************************** */ #include #include "cstring.h" #include "unicode/uloc.h" #include "unicode/ucnv.h" #include "unicode/ucnv_err.h" #include "unicode/ucnv_cb.h" #include "cintltst.h" #include "unicode/utypes.h" #include "unicode/ustring.h" #include "unicode/ucol.h" #include "unicode/utf16.h" #include "cmemory.h" #include "nucnvtst.h" #define LENGTHOF(array) (sizeof(array)/sizeof((array)[0])) static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message); static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message); #if !UCONFIG_NO_COLLATION static void TestJitterbug981(void); #endif static void TestJitterbug1293(void); static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ; static void TestConverterTypesAndStarters(void); static void TestAmbiguous(void); static void TestSignatureDetection(void); static void TestUTF7(void); static void TestIMAP(void); static void TestUTF8(void); static void TestCESU8(void); static void TestUTF16(void); static void TestUTF16BE(void); static void TestUTF16LE(void); static void TestUTF32(void); static void TestUTF32BE(void); static void TestUTF32LE(void); static void TestLATIN1(void); #if !UCONFIG_NO_LEGACY_CONVERSION static void TestSBCS(void); static void TestDBCS(void); static void TestMBCS(void); #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO static void TestICCRunout(void); #endif #ifdef U_ENABLE_GENERIC_ISO_2022 static void TestISO_2022(void); #endif static void TestISO_2022_JP(void); static void TestISO_2022_JP_1(void); static void TestISO_2022_JP_2(void); static void TestISO_2022_KR(void); static void TestISO_2022_KR_1(void); static void TestISO_2022_CN(void); #if 0 /* * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 */ static void TestISO_2022_CN_EXT(void); #endif static void TestJIS(void); static void TestHZ(void); #endif static void TestSCSU(void); #if !UCONFIG_NO_LEGACY_CONVERSION static void TestEBCDIC_STATEFUL(void); static void TestGB18030(void); static void TestLMBCS(void); static void TestJitterbug255(void); static void TestEBCDICUS4XML(void); #if 0 /* * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 */ static void TestJitterbug915(void); #endif static void TestISCII(void); static void TestCoverageMBCS(void); static void TestJitterbug2346(void); static void TestJitterbug2411(void); static void TestJB5275(void); static void TestJB5275_1(void); static void TestJitterbug6175(void); static void TestIsFixedWidth(void); #endif static void TestInBufSizes(void); static void TestRoundTrippingAllUTF(void); static void TestConv(const uint16_t in[], int len, const char* conv, const char* lang, char byteArr[], int byteArrLen); /* open a converter, using test data if it begins with '@' */ static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err); #define NEW_MAX_BUFFER 999 static int32_t gInBufferSize = NEW_MAX_BUFFER; static int32_t gOutBufferSize = NEW_MAX_BUFFER; static char gNuConvTestName[1024]; #define nct_min(x,y) ((x=0 */ (*r>=0 && (int32_t)(s-s0)!=*r) || c!=*(r+1) ) { log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n", message, c, (s-s0), *(r+1), *r); break; } r+=2; } } static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message) { const char* s=(char*)source; UErrorCode errorCode=U_ZERO_ERROR; uint32_t c; c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); if(errorCode != expected){ log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode)); } if(c != 0xFFFD && c != 0xffff){ log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c); } } static void TestInBufSizes(void) { TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1); #if 1 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2); TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3); TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4); TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5); TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6); TestNewConvertWithBufferSizes(1,1); TestNewConvertWithBufferSizes(2,3); TestNewConvertWithBufferSizes(3,2); #endif } static void TestOutBufSizes(void) { #if 1 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER); TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER); TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER); TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER); TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER); TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER); #endif } void addTestNewConvert(TestNode** root) { #if !UCONFIG_NO_FILE_IO addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes"); addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes"); #endif addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters"); addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous"); addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection"); addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7"); addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP"); addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8"); /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */ addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8"); addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16"); addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE"); addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE"); addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32"); addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE"); addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE"); #if !UCONFIG_NO_LEGACY_CONVERSION addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS"); #endif addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1"); #if !UCONFIG_NO_LEGACY_CONVERSION addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS"); #if !UCONFIG_NO_FILE_IO addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS"); addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout"); #endif addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS"); #ifdef U_ENABLE_GENERIC_ISO_2022 addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022"); #endif addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP"); addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS"); addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1"); addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2"); addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR"); addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1"); addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN"); /* * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT"); addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915"); */ addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ"); #endif addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU"); #if !UCONFIG_NO_LEGACY_CONVERSION addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL"); addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030"); addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255"); addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML"); addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII"); addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275"); addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1"); #if !UCONFIG_NO_COLLATION addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981"); #endif addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293"); #endif #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS"); #endif addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF"); #if !UCONFIG_NO_LEGACY_CONVERSION addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346"); addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411"); addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175"); addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth"); #endif } /* Note that this test already makes use of statics, so it's not really multithread safe. This convenience function lets us make the error messages actually useful. */ static void setNuConvTestName(const char *codepage, const char *direction) { sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", codepage, direction, (int)gInBufferSize, (int)gOutBufferSize); } typedef enum { TC_OK = 0, /* test was OK */ TC_MISMATCH = 1, /* Match failed - err was printed */ TC_FAIL = 2 /* Test failed, don't print an err because it was already printed. */ } ETestConvertResult; /* Note: This function uses global variables and it will not do offset checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, const char *codepage, const int32_t *expectOffsets , UBool useFallback) { UErrorCode status = U_ZERO_ERROR; UConverter *conv = 0; char junkout[NEW_MAX_BUFFER]; /* FIX */ int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ char *p; const UChar *src; char *end; char *targ; int32_t *offs; int i; int32_t realBufferSize; char *realBufferEnd; const UChar *realSourceEnd; const UChar *sourceLimit; UBool checkOffsets = TRUE; UBool doFlush; for(i=0;i %d chars out]. \nResult :", sourceLen, targ-junkout); if(getTestOption(VERBOSITY_OPTION)) { char junk[9999]; char offset_str[9999]; char *ptr; junk[0] = 0; offset_str[0] = 0; for(ptr = junkout;ptr%s\n", gNuConvTestName); printUSeqErr(source, sourceLen); fprintf(stderr, "Got:\n"); printSeqErr((const unsigned char *)junkout, expectLen); fprintf(stderr, "Expected:\n"); printSeqErr((const unsigned char *)expect, expectLen); return TC_MISMATCH; } } /* Note: This function uses global variables and it will not do offset checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, const char *codepage, const int32_t *expectOffsets, UBool useFallback) { UErrorCode status = U_ZERO_ERROR; UConverter *conv = 0; UChar junkout[NEW_MAX_BUFFER]; /* FIX */ int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ const char *src; const char *realSourceEnd; const char *srcLimit; UChar *p; UChar *targ; UChar *end; int32_t *offs; int i; UBool checkOffsets = TRUE; int32_t realBufferSize; UChar *realBufferEnd; for(i=0;i %d chars.\nResult :", sourcelen, targ-junkout); if(getTestOption(VERBOSITY_OPTION)) { char junk[9999]; char offset_str[9999]; UChar *ptr; junk[0] = 0; offset_str[0] = 0; for(ptr = junkout;ptr h1 h2 h3 . EBCDIC_STATEFUL */ static const uint8_t expectedIBM930[] = { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f }; static const int32_t toIBM930Offs[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 }; static const int32_t fmIBM930Offs[] = { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e }; /* 1 2 3 0 h1 h2 h3 . MBCS*/ static const uint8_t expectedIBM943[] = { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc }; static const int32_t toIBM943Offs [] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 }; static const int32_t fmIBM943Offs[] = { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b }; /* 1 2 3 0 h1 h2 h3 . DBCS*/ static const uint8_t expectedIBM9027[] = { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe }; static const int32_t toIBM9027Offs [] = { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 }; /* 1 2 3 0 . SBCS*/ static const uint8_t expectedIBM920[] = { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a }; static const int32_t toIBM920Offs [] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; /* 1 2 3 0 . SBCS*/ static const uint8_t expectedISO88593[] = { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; static const int32_t toISO88593Offs[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; /* 1 2 3 0 . LATIN_1*/ static const uint8_t expectedLATIN1[] = { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; static const int32_t toLATIN1Offs[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; /* etc */ static const uint8_t expectedUTF16BE[] = { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 }; static const int32_t toUTF16BEOffs[]= { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; static const int32_t fmUTF16BEOffs[] = { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 }; static const uint8_t expectedUTF16LE[] = { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc }; static const int32_t toUTF16LEOffs[]= { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; static const int32_t fmUTF16LEOffs[] = { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 }; static const uint8_t expectedUTF32BE[] = { 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x00, 0x00, 0x4e, 0x09, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x02, 0x00, 0x21 }; static const int32_t toUTF32BEOffs[]= { 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08 }; static const int32_t fmUTF32BEOffs[] = { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 }; static const uint8_t expectedUTF32LE[] = { 0x31, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x00, 0x00, 0x8c, 0x4e, 0x00, 0x00, 0x09, 0x4e, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x21, 0x00, 0x02, 0x00 }; static const int32_t toUTF32LEOffs[]= { 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08 }; static const int32_t fmUTF32LEOffs[] = { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 }; /** Test chars #2 **/ /* Sahha [health], slashed h's */ static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 }; static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 }; /* LMBCS */ static const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 }; static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 }; static const int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 }; static const int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008}; /*********************************** START OF CODE finally *************/ gInBufferSize = insize; gOutBufferSize = outsize; log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize); /*UTF-8*/ testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE ); log_verbose("Test surrogate behaviour for UTF8\n"); { static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 }; static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac, 0xf0, 0x90, 0x90, 0x81, 0xef, 0xbf, 0xbd }; static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 }; testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]), expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE ); } #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) /*ISO-2022*/ testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE ); #endif /*UTF16 LE*/ testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE ); /*UTF16 BE*/ testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE ); /*UTF32 LE*/ testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE ); /*UTF32 BE*/ testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE ); /*LATIN_1*/ testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE ); #if !UCONFIG_NO_LEGACY_CONVERSION /*EBCDIC_STATEFUL*/ testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE ); testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); /*MBCS*/ testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE ); /*DBCS*/ testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE ); /*SBCS*/ testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE ); /*SBCS*/ testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); #endif /****/ /*UTF-8*/ testConvertToU(expectedUTF8, sizeof(expectedUTF8), sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs,FALSE); #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) /*ISO-2022*/ testConvertToU(expectedISO2022, sizeof(expectedISO2022), sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2022Offs,FALSE); #endif /*UTF16 LE*/ testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE); /*UTF16 BE*/ testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE), sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs,FALSE); /*UTF32 LE*/ testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE), sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs,FALSE); /*UTF32 BE*/ testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE), sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs,FALSE); #if !UCONFIG_NO_LEGACY_CONVERSION /*EBCDIC_STATEFUL*/ testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable, sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-930", fmIBM930Offs,FALSE); /*MBCS*/ testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable, sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-943", fmIBM943Offs,FALSE); #endif /* Try it again to make sure it still works */ testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE); #if !UCONFIG_NO_LEGACY_CONVERSION testConvertToU(expectedMaltese913, sizeof(expectedMaltese913), malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL,FALSE); testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE ); /*LMBCS*/ testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE ); testConvertToU(expectedLMBCS, sizeof(expectedLMBCS), LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLMBCSOffs,FALSE); #endif /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */ { /* encode directly set D and set O */ static const uint8_t utf7[] = { /* Hi Mom -+Jjo--! A+ImIDkQ. +- +ZeVnLIqe- */ 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, 0x2b, 0x2d, 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d }; static const UChar unicode[] = { /* Hi Mom --! A. + [Japanese word "nihongo"] */ 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, 0x41, 0x2262, 0x0391, 0x2e, 0x2b, 0x65e5, 0x672c, 0x8a9e }; static const int32_t toUnicodeOffsets[] = { 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, 15, 17, 19, 23, 24, 27, 29, 32 }; static const int32_t fromUnicodeOffsets[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 11, 12, 12, 12, 13, 13, 13, 13, 14, 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 18 }; /* same but escaping set O (the exclamation mark) */ static const uint8_t utf7Restricted[] = { /* Hi Mom -+Jjo--+ACE- A+ImIDkQ. +- +ZeVnLIqe- */ 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d, 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, 0x2b, 0x2d, 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d }; static const int32_t toUnicodeOffsetsR[] = { 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15, 19, 21, 23, 27, 28, 31, 33, 36 }; static const int32_t fromUnicodeOffsetsR[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10, 11, 12, 12, 12, 13, 13, 13, 13, 14, 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 18 }; testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE); testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE); testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE); testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE); } /* * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152, * modified according to RFC 2060, * and supplemented with the one example in RFC 2060 itself. */ { static const uint8_t imap[] = { /* Hi Mom -&Jjo--! A&ImIDkQ-. &- &ZeVnLIqe- \ ~peter /mail /&ZeVnLIqe- /&U,BTFw- */ 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e, 0x26, 0x2d, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, 0x5c, 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, 0x2f, 0x6d, 0x61, 0x69, 0x6c, 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d }; static const UChar unicode[] = { /* Hi Mom --! A. & [Japanese word "nihongo"] \ ~peter /mail /<65e5, 672c, 8a9e> /<53f0, 5317> */ 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, 0x41, 0x2262, 0x0391, 0x2e, 0x26, 0x65e5, 0x672c, 0x8a9e, 0x5c, 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, 0x2f, 0x6d, 0x61, 0x69, 0x6c, 0x2f, 0x65e5, 0x672c, 0x8a9e, 0x2f, 0x53f0, 0x5317 }; static const int32_t toUnicodeOffsets[] = { 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, 15, 17, 19, 24, 25, 28, 30, 33, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 51, 53, 56, 60, 62, 64 }; static const int32_t fromUnicodeOffsets[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 11, 12, 12, 12, 13, 13, 13, 13, 13, 14, 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 35, 36, 36, 36, 37, 37, 37, 37, 37 }; testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE); testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE); } /* Test UTF-8 bad data handling*/ { static const uint8_t utf8[]={ 0x61, 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 0x00, 0x62, 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */ 0xdf, 0xbf, /* 7ff */ 0xbf, /* truncated tail */ 0xf4, 0x90, 0x80, 0x80, /* 11FFFF */ 0x02 }; static const uint16_t utf8Expected[]={ 0x0061, 0xfffd, 0x0000, 0x0062, 0xfffd, 0xfffd, 0xdbff, 0xdfff, 0x07ff, 0xfffd, 0xfffd, 0x0002 }; static const int32_t utf8Offsets[]={ 0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28 }; testConvertToU(utf8, sizeof(utf8), utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]), "utf-8", utf8Offsets ,FALSE); } /* Test UTF-32BE bad data handling*/ { static const uint8_t utf32[]={ 0x00, 0x00, 0x00, 0x61, 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ 0x00, 0x00, 0x00, 0x62, 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ 0x00, 0x00, 0x01, 0x62, 0x00, 0x00, 0x02, 0x62 }; static const uint16_t utf32Expected[]={ 0x0061, 0xfffd, /* 0x110000 out of range */ 0xDBFF, /* 0x10FFFF in range */ 0xDFFF, 0x0062, 0xfffd, /* 0xffffffff out of range */ 0xfffd, /* 0x7fffffff out of range */ 0x0162, 0x0262 }; static const int32_t utf32Offsets[]={ 0, 4, 8, 8, 12, 16, 20, 24, 28 }; static const uint8_t utf32ExpectedBack[]={ 0x00, 0x00, 0x00, 0x61, 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */ 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */ 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */ 0x00, 0x00, 0x01, 0x62, 0x00, 0x00, 0x02, 0x62 }; static const int32_t utf32OffsetsBack[]={ 0,0,0,0, 1,1,1,1, 2,2,2,2, 4,4,4,4, 5,5,5,5, 6,6,6,6, 7,7,7,7, 8,8,8,8 }; testConvertToU(utf32, sizeof(utf32), utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32be", utf32Offsets ,FALSE); testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE); } /* Test UTF-32LE bad data handling*/ { static const uint8_t utf32[]={ 0x61, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ 0x62, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ 0x62, 0x01, 0x00, 0x00, 0x62, 0x02, 0x00, 0x00, }; static const uint16_t utf32Expected[]={ 0x0061, 0xfffd, /* 0x110000 out of range */ 0xDBFF, /* 0x10FFFF in range */ 0xDFFF, 0x0062, 0xfffd, /* 0xffffffff out of range */ 0xfffd, /* 0x7fffffff out of range */ 0x0162, 0x0262 }; static const int32_t utf32Offsets[]={ 0, 4, 8, 8, 12, 16, 20, 24, 28 }; static const uint8_t utf32ExpectedBack[]={ 0x61, 0x00, 0x00, 0x00, 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */ 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ 0x62, 0x00, 0x00, 0x00, 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */ 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */ 0x62, 0x01, 0x00, 0x00, 0x62, 0x02, 0x00, 0x00 }; static const int32_t utf32OffsetsBack[]={ 0,0,0,0, 1,1,1,1, 2,2,2,2, 4,4,4,4, 5,5,5,5, 6,6,6,6, 7,7,7,7, 8,8,8,8 }; testConvertToU(utf32, sizeof(utf32), utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32le", utf32Offsets,FALSE ); testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE); } } static void TestCoverageMBCS(){ #if 0 UErrorCode status = U_ZERO_ERROR; const char *directory = loadTestData(&status); char* tdpath = NULL; char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1)); int len = strlen(directory); char* index=NULL; tdpath = (char*) malloc(sizeof(char) * (len * 2)); uprv_strcpy(saveDirectory,u_getDataDirectory()); log_verbose("Retrieved data directory %s \n",saveDirectory); uprv_strcpy(tdpath,directory); index=strrchr(tdpath,(char)U_FILE_SEP_CHAR); if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){ *(index+1)=0; } u_setDataDirectory(tdpath); log_verbose("ICU data directory is set to: %s \n" ,tdpath); #endif /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm which is test file for MBCS conversion with single-byte codepage data.*/ { /* MBCS with single byte codepage data test1.ucm*/ const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003}; const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,}; int32_t totest1Offs[] = { 0, 1, 2, 3, 5, }; /*from Unicode*/ testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE ); } /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm which is test file for MBCS conversion with three-byte codepage data.*/ { /* MBCS with three byte codepage data test3.ucm*/ const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,}; int32_t totest3Offs[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8}; const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,}; const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 }; /*from Unicode*/ testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE ); /*to Unicode*/ testConvertToU(test3input, sizeof(test3input), expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", fromtest3Offs ,FALSE); } /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm which is test file for MBCS conversion with four-byte codepage data.*/ { /* MBCS with three byte codepage data test4.ucm*/ static const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,}; static const int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,}; static const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,}; static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,}; /*from Unicode*/ testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE ); /*to Unicode*/ testConvertToU(test4input, sizeof(test4input), expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", fromtest4Offs,FALSE ); } #if 0 free(tdpath); /* restore the original data directory */ log_verbose("Setting the data directory to %s \n", saveDirectory); u_setDataDirectory(saveDirectory); free(saveDirectory); #endif } static void TestConverterType(const char *convName, UConverterType convType) { UConverter* myConverter; UErrorCode err = U_ZERO_ERROR; myConverter = my_ucnv_open(convName, &err); if (U_FAILURE(err)) { log_data_err("Failed to create an %s converter\n", convName); return; } else { if (ucnv_getType(myConverter)!=convType) { log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n", convName, convType); } else { log_verbose("ucnv_getType %s ok\n", convName); } } ucnv_close(myConverter); } static void TestConverterTypesAndStarters() { #if !UCONFIG_NO_LEGACY_CONVERSION UConverter* myConverter; UErrorCode err = U_ZERO_ERROR; UBool mystarters[256]; /* const UBool expectedKSCstarters[256] = { FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/ log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types."); myConverter = ucnv_open("ksc", &err); if (U_FAILURE(err)) { log_data_err("Failed to create an ibm-ksc converter\n"); return; } else { if (ucnv_getType(myConverter)!=UCNV_MBCS) log_err("ucnv_getType Failed for ibm-949\n"); else log_verbose("ucnv_getType ibm-949 ok\n"); if(myConverter!=NULL) ucnv_getStarters(myConverter, mystarters, &err); /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters))) log_err("Failed ucnv_getStarters for ksc\n"); else log_verbose("ucnv_getStarters ok\n");*/ } ucnv_close(myConverter); TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL); TestConverterType("ibm-878", UCNV_SBCS); #endif TestConverterType("iso-8859-1", UCNV_LATIN_1); TestConverterType("ibm-1208", UCNV_UTF8); TestConverterType("utf-8", UCNV_UTF8); TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian); TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian); TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian); TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian); #if !UCONFIG_NO_LEGACY_CONVERSION #if defined(U_ENABLE_GENERIC_ISO_2022) TestConverterType("iso-2022", UCNV_ISO_2022); #endif TestConverterType("hz", UCNV_HZ); #endif TestConverterType("scsu", UCNV_SCSU); #if !UCONFIG_NO_LEGACY_CONVERSION TestConverterType("x-iscii-de", UCNV_ISCII); #endif TestConverterType("ascii", UCNV_US_ASCII); TestConverterType("utf-7", UCNV_UTF7); TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX); TestConverterType("bocu-1", UCNV_BOCU1); } static void TestAmbiguousConverter(UConverter *cnv) { static const char inBytes[3]={ 0x61, 0x5B, 0x5c }; UChar outUnicode[20]={ 0, 0, 0, 0 }; const char *s; UChar *u; UErrorCode errorCode; UBool isAmbiguous; /* try to convert an 'a', a square bracket and a US-ASCII backslash */ errorCode=U_ZERO_ERROR; s=inBytes; u=outUnicode; ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode); if(U_FAILURE(errorCode)) { /* we do not care about general failures in this test; the input may just not be mappable */ return; } if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) { /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */ /* There are some encodings that are partially ASCII based, like the ISO-7 and GSM series of codepages, which we ignore. */ return; } isAmbiguous=ucnv_isAmbiguous(cnv); /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */ if((outUnicode[2]!=0x5c)!=isAmbiguous) { log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n", ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous); return; } if(outUnicode[2]!=0x5c) { /* needs fixup, fix it */ ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode)); if(outUnicode[2]!=0x5c) { /* the fix failed */ log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode)); return; } } } static void TestAmbiguous() { UErrorCode status = U_ZERO_ERROR; UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv; static const char target[] = { /* "\\usr\\local\\share\\data\\icutest.txt" */ 0x5c, 0x75, 0x73, 0x72, 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65, 0x5c, 0x64, 0x61, 0x74, 0x61, 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74, 0 }; UChar asciiResult[200], sjisResult[200]; int32_t /*asciiLength = 0,*/ sjisLength = 0, i; const char *name; /* enumerate all converters */ status=U_ZERO_ERROR; for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) { cnv=ucnv_open(name, &status); if(U_SUCCESS(status)) { TestAmbiguousConverter(cnv); ucnv_close(cnv); } else { log_err("error: unable to open available converter \"%s\"\n", name); status=U_ZERO_ERROR; } } #if !UCONFIG_NO_LEGACY_CONVERSION sjis_cnv = ucnv_open("ibm-943", &status); if (U_FAILURE(status)) { log_data_err("Failed to create a SJIS converter\n"); return; } ascii_cnv = ucnv_open("LATIN-1", &status); if (U_FAILURE(status)) { log_data_err("Failed to create a LATIN-1 converter\n"); ucnv_close(sjis_cnv); return; } /* convert target from SJIS to Unicode */ sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status); if (U_FAILURE(status)) { log_err("Failed to convert the SJIS string.\n"); ucnv_close(sjis_cnv); ucnv_close(ascii_cnv); return; } /* convert target from Latin-1 to Unicode */ /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status); if (U_FAILURE(status)) { log_err("Failed to convert the Latin-1 string.\n"); ucnv_close(sjis_cnv); ucnv_close(ascii_cnv); return; } if (!ucnv_isAmbiguous(sjis_cnv)) { log_err("SJIS converter should contain ambiguous character mappings.\n"); ucnv_close(sjis_cnv); ucnv_close(ascii_cnv); return; } if (u_strcmp(sjisResult, asciiResult) == 0) { log_err("File separators for SJIS don't need to be fixed.\n"); } ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength); if (u_strcmp(sjisResult, asciiResult) != 0) { log_err("Fixing file separator for SJIS failed.\n"); } ucnv_close(sjis_cnv); ucnv_close(ascii_cnv); #endif } static void TestSignatureDetection(){ /* with null terminated strings */ { static const char* data[] = { "\xFE\xFF\x00\x00", /* UTF-16BE */ "\xFF\xFE\x00\x00", /* UTF-16LE */ "\xEF\xBB\xBF\x00", /* UTF-8 */ "\x0E\xFE\xFF\x00", /* SCSU */ "\xFE\xFF", /* UTF-16BE */ "\xFF\xFE", /* UTF-16LE */ "\xEF\xBB\xBF", /* UTF-8 */ "\x0E\xFE\xFF", /* SCSU */ "\xFE\xFF\x41\x42", /* UTF-16BE */ "\xFF\xFE\x41\x41", /* UTF-16LE */ "\xEF\xBB\xBF\x41", /* UTF-8 */ "\x0E\xFE\xFF\x41", /* SCSU */ "\x2B\x2F\x76\x38\x2D", /* UTF-7 */ "\x2B\x2F\x76\x38\x41", /* UTF-7 */ "\x2B\x2F\x76\x39\x41", /* UTF-7 */ "\x2B\x2F\x76\x2B\x41", /* UTF-7 */ "\x2B\x2F\x76\x2F\x41", /* UTF-7 */ "\xDD\x73\x66\x73" /* UTF-EBCDIC */ }; static const char* expected[] = { "UTF-16BE", "UTF-16LE", "UTF-8", "SCSU", "UTF-16BE", "UTF-16LE", "UTF-8", "SCSU", "UTF-16BE", "UTF-16LE", "UTF-8", "SCSU", "UTF-7", "UTF-7", "UTF-7", "UTF-7", "UTF-7", "UTF-EBCDIC" }; static const int32_t expectedLength[] ={ 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 5, 4, 4, 4, 4, 4 }; int i=0; UErrorCode err; int32_t signatureLength = -1; const char* source = NULL; const char* enc = NULL; for( ; i */ 2, 0x2d, 1, 0x21, 2, 0x2b, 7, 0x10401 }; const char *cnvName; const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv=ucnv_open("UTF-7", &errorCode); if(U_FAILURE(errorCode)) { log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */ return; } TestNextUChar(cnv, source, limit, results, "UTF-7"); /* Test the condition when source >= sourceLimit */ TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); cnvName = ucnv_getName(cnv, &errorCode); if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) { log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode)); } ucnv_close(cnv); } static void TestIMAP() { /* test input */ static const uint8_t in[]={ /* H - &Jjo- - ! &- &2AHcAQ- \ */ 0x48, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, 0x26, 0x2d, 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d }; /* expected test results */ static const int32_t results[]={ /* number of bytes read, code point */ 1, 0x48, 1, 0x2d, 4, 0x263a, /* */ 2, 0x2d, 1, 0x21, 2, 0x26, 7, 0x10401 }; const char *cnvName; const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode); if(U_FAILURE(errorCode)) { log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */ return; } TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name"); /* Test the condition when source >= sourceLimit */ TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); cnvName = ucnv_getName(cnv, &errorCode); if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) { log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode)); } ucnv_close(cnv); } static void TestUTF8() { /* test input */ static const uint8_t in[]={ 0x61, 0xc2, 0x80, 0xe0, 0xa0, 0x80, 0xf0, 0x90, 0x80, 0x80, 0xf4, 0x84, 0x8c, 0xa1, 0xf0, 0x90, 0x90, 0x81 }; /* expected test results */ static const int32_t results[]={ /* number of bytes read, code point */ 1, 0x61, 2, 0x80, 3, 0x800, 4, 0x10000, 4, 0x104321, 4, 0x10401 }; /* error test input */ static const uint8_t in2[]={ 0x61, 0xc0, 0x80, /* illegal non-shortest form */ 0xe0, 0x80, 0x80, /* illegal non-shortest form */ 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ 0xc0, 0xc0, /* illegal trail byte */ 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ 0xfe, /* illegal byte altogether */ 0x62 }; /* expected error test results */ static const int32_t results2[]={ /* number of bytes read, code point */ 1, 0x61, 22, 0x62 }; UConverterToUCallback cb; const void *p; const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv=ucnv_open("UTF-8", &errorCode); if(U_FAILURE(errorCode)) { log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode)); return; } TestNextUChar(cnv, source, limit, results, "UTF-8"); /* Test the condition when source >= sourceLimit */ TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); /* test error behavior with a skip callback */ ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); source=(const char *)in2; limit=(const char *)(in2+sizeof(in2)); TestNextUChar(cnv, source, limit, results2, "UTF-8"); ucnv_close(cnv); } static void TestCESU8() { /* test input */ static const uint8_t in[]={ 0x61, 0xc2, 0x80, 0xe0, 0xa0, 0x80, 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82, 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf, 0xef, 0xbf, 0xbc }; /* expected test results */ static const int32_t results[]={ /* number of bytes read, code point */ 1, 0x61, 2, 0x80, 3, 0x800, 6, 0x10000, 3, 0xdc01, -1,0xd802, /* may read 3 or 6 bytes */ -1,0x10ffff,/* may read 0 or 3 bytes */ 3, 0xfffc }; /* error test input */ static const uint8_t in2[]={ 0x61, 0xc0, 0x80, /* illegal non-shortest form */ 0xe0, 0x80, 0x80, /* illegal non-shortest form */ 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ 0xc0, 0xc0, /* illegal trail byte */ 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */ 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */ 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */ 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ 0xfe, /* illegal byte altogether */ 0x62 }; /* expected error test results */ static const int32_t results2[]={ /* number of bytes read, code point */ 1, 0x61, 34, 0x62 }; UConverterToUCallback cb; const void *p; const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv=ucnv_open("CESU-8", &errorCode); if(U_FAILURE(errorCode)) { log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode)); return; } TestNextUChar(cnv, source, limit, results, "CESU-8"); /* Test the condition when source >= sourceLimit */ TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); /* test error behavior with a skip callback */ ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); source=(const char *)in2; limit=(const char *)(in2+sizeof(in2)); TestNextUChar(cnv, source, limit, results2, "CESU-8"); ucnv_close(cnv); } static void TestUTF16() { /* test input */ static const uint8_t in1[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff }; static const uint8_t in2[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff }; static const uint8_t in3[]={ 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01 }; /* expected test results */ static const int32_t results1[]={ /* number of bytes read, code point */ 4, 0x4e00, 2, 0xfeff }; static const int32_t results2[]={ /* number of bytes read, code point */ 4, 0x004e, 2, 0xfffe }; static const int32_t results3[]={ /* number of bytes read, code point */ 2, 0xfefe, 2, 0x4e00, 2, 0xfeff, 4, 0x20001 }; const char *source, *limit; UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv=ucnv_open("UTF-16", &errorCode); if(U_FAILURE(errorCode)) { log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode)); return; } source=(const char *)in1, limit=(const char *)in1+sizeof(in1); TestNextUChar(cnv, source, limit, results1, "UTF-16"); source=(const char *)in2, limit=(const char *)in2+sizeof(in2); ucnv_resetToUnicode(cnv); TestNextUChar(cnv, source, limit, results2, "UTF-16"); source=(const char *)in3, limit=(const char *)in3+sizeof(in3); ucnv_resetToUnicode(cnv); TestNextUChar(cnv, source, limit, results3, "UTF-16"); /* Test the condition when source >= sourceLimit */ ucnv_resetToUnicode(cnv); TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); ucnv_close(cnv); } static void TestUTF16BE() { /* test input */ static const uint8_t in[]={ 0x00, 0x61, 0x00, 0xc0, 0x00, 0x31, 0x00, 0xf4, 0xce, 0xfe, 0xd8, 0x01, 0xdc, 0x01 }; /* expected test results */ static const int32_t results[]={ /* number of bytes read, code point */ 2, 0x61, 2, 0xc0, 2, 0x31, 2, 0xf4, 2, 0xcefe, 4, 0x10401 }; const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv=ucnv_open("utf-16be", &errorCode); if(U_FAILURE(errorCode)) { log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode)); return; } TestNextUChar(cnv, source, limit, results, "UTF-16BE"); /* Test the condition when source >= sourceLimit */ TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); /*Test for the condition where there is an invalid character*/ { static const uint8_t source2[]={0x61}; ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); } #if 0 /* * Test disabled because currently the UTF-16BE/LE converters are supposed * to not set errors for unpaired surrogates. * This may change with * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 */ /*Test for the condition where there is a surrogate pair*/ { const uint8_t source2[]={0xd8, 0x01}; TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); } #endif ucnv_close(cnv); } static void TestUTF16LE() { /* test input */ static const uint8_t in[]={ 0x61, 0x00, 0x31, 0x00, 0x4e, 0x2e, 0x4e, 0x00, 0x01, 0xd8, 0x01, 0xdc }; /* expected test results */ static const int32_t results[]={ /* number of bytes read, code point */ 2, 0x61, 2, 0x31, 2, 0x2e4e, 2, 0x4e, 4, 0x10401 }; const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv=ucnv_open("utf-16le", &errorCode); if(U_FAILURE(errorCode)) { log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode)); return; } TestNextUChar(cnv, source, limit, results, "UTF-16LE"); /* Test the condition when source >= sourceLimit */ TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); /*Test for the condition where there is an invalid character*/ { static const uint8_t source2[]={0x61}; ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); } #if 0 /* * Test disabled because currently the UTF-16BE/LE converters are supposed * to not set errors for unpaired surrogates. * This may change with * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 */ /*Test for the condition where there is a surrogate character*/ { static const uint8_t source2[]={0x01, 0xd8}; TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); } #endif ucnv_close(cnv); } static void TestUTF32() { /* test input */ static const uint8_t in1[]={ 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff }; static const uint8_t in2[]={ 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 }; static const uint8_t in3[]={ 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 }; /* expected test results */ static const int32_t results1[]={ /* number of bytes read, code point */ 8, 0x100f00, 4, 0xfeff }; static const int32_t results2[]={ /* number of bytes read, code point */ 8, 0x0f1000, 4, 0xfffe }; static const int32_t results3[]={ /* number of bytes read, code point */ 4, 0xfefe, 4, 0x100f00, 4, 0xfffd, /* unmatched surrogate */ 4, 0xfffd /* unmatched surrogate */ }; const char *source, *limit; UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv=ucnv_open("UTF-32", &errorCode); if(U_FAILURE(errorCode)) { log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode)); return; } source=(const char *)in1, limit=(const char *)in1+sizeof(in1); TestNextUChar(cnv, source, limit, results1, "UTF-32"); source=(const char *)in2, limit=(const char *)in2+sizeof(in2); ucnv_resetToUnicode(cnv); TestNextUChar(cnv, source, limit, results2, "UTF-32"); source=(const char *)in3, limit=(const char *)in3+sizeof(in3); ucnv_resetToUnicode(cnv); TestNextUChar(cnv, source, limit, results3, "UTF-32"); /* Test the condition when source >= sourceLimit */ ucnv_resetToUnicode(cnv); TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); ucnv_close(cnv); } static void TestUTF32BE() { /* test input */ static const uint8_t in[]={ 0x00, 0x00, 0x00, 0x61, 0x00, 0x00, 0x30, 0x61, 0x00, 0x00, 0xdc, 0x00, 0x00, 0x00, 0xd8, 0x00, 0x00, 0x00, 0xdf, 0xff, 0x00, 0x00, 0xff, 0xfe, 0x00, 0x10, 0xab, 0xcd, 0x00, 0x10, 0xff, 0xff }; /* expected test results */ static const int32_t results[]={ /* number of bytes read, code point */ 4, 0x61, 4, 0x3061, 4, 0xfffd, 4, 0xfffd, 4, 0xfffd, 4, 0xfffe, 4, 0x10abcd, 4, 0x10ffff }; /* error test input */ static const uint8_t in2[]={ 0x00, 0x00, 0x00, 0x61, 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ 0x00, 0x00, 0x00, 0x62, 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ 0x00, 0x00, 0x01, 0x62, 0x00, 0x00, 0x02, 0x62 }; /* expected error test results */ static const int32_t results2[]={ /* number of bytes read, code point */ 4, 0x61, 8, 0x62, 12, 0x162, 4, 0x262 }; UConverterToUCallback cb; const void *p; const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv=ucnv_open("UTF-32BE", &errorCode); if(U_FAILURE(errorCode)) { log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode)); return; } TestNextUChar(cnv, source, limit, results, "UTF-32BE"); /* Test the condition when source >= sourceLimit */ TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); /* test error behavior with a skip callback */ ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); source=(const char *)in2; limit=(const char *)(in2+sizeof(in2)); TestNextUChar(cnv, source, limit, results2, "UTF-32BE"); ucnv_close(cnv); } static void TestUTF32LE() { /* test input */ static const uint8_t in[]={ 0x61, 0x00, 0x00, 0x00, 0x61, 0x30, 0x00, 0x00, 0x00, 0xdc, 0x00, 0x00, 0x00, 0xd8, 0x00, 0x00, 0xff, 0xdf, 0x00, 0x00, 0xfe, 0xff, 0x00, 0x00, 0xcd, 0xab, 0x10, 0x00, 0xff, 0xff, 0x10, 0x00 }; /* expected test results */ static const int32_t results[]={ /* number of bytes read, code point */ 4, 0x61, 4, 0x3061, 4, 0xfffd, 4, 0xfffd, 4, 0xfffd, 4, 0xfffe, 4, 0x10abcd, 4, 0x10ffff }; /* error test input */ static const uint8_t in2[]={ 0x61, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ 0x62, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ 0x62, 0x01, 0x00, 0x00, 0x62, 0x02, 0x00, 0x00, }; /* expected error test results */ static const int32_t results2[]={ /* number of bytes read, code point */ 4, 0x61, 8, 0x62, 12, 0x162, 4, 0x262, }; UConverterToUCallback cb; const void *p; const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv=ucnv_open("UTF-32LE", &errorCode); if(U_FAILURE(errorCode)) { log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode)); return; } TestNextUChar(cnv, source, limit, results, "UTF-32LE"); /* Test the condition when source >= sourceLimit */ TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); /* test error behavior with a skip callback */ ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); source=(const char *)in2; limit=(const char *)(in2+sizeof(in2)); TestNextUChar(cnv, source, limit, results2, "UTF-32LE"); ucnv_close(cnv); } static void TestLATIN1() { /* test input */ static const uint8_t in[]={ 0x61, 0x31, 0x32, 0xc0, 0xf0, 0xf4, }; /* expected test results */ static const int32_t results[]={ /* number of bytes read, code point */ 1, 0x61, 1, 0x31, 1, 0x32, 1, 0xc0, 1, 0xf0, 1, 0xf4, }; static const uint16_t in1[] = { 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 0xcb, 0x82 }; static const uint8_t out1[] = { 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 0xcb, 0x82 }; static const uint16_t in2[]={ 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x37, 0x20, 0x2A, 0x2F, }; static const unsigned char out2[]={ 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x37, 0x20, 0x2A, 0x2F, }; const char *source=(const char *)in; const char *limit=(const char *)in+sizeof(in); UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv=ucnv_open("LATIN_1", &errorCode); if(U_FAILURE(errorCode)) { log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode)); return; } TestNextUChar(cnv, source, limit, results, "LATIN_1"); /* Test the condition when source >= sourceLimit */ TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1)); TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2)); ucnv_close(cnv); } static void TestSBCS() { /* test input */ static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4}; /* expected test results */ static const int32_t results[]={ /* number of bytes read, code point */ 1, 0x61, 1, 0xbf, 1, 0xc4, 1, 0x2021, 1, 0xf8ff, 1, 0x00d9 }; const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode); if(U_FAILURE(errorCode)) { log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode)); return; } TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)"); /* Test the condition when source >= sourceLimit */ TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); /*Test for Illegal character */ /* { static const uint8_t input1[]={ 0xA1 }; const char* illegalsource=(const char*)input1; TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte"); } */ ucnv_close(cnv); } static void TestDBCS() { /* test input */ static const uint8_t in[]={ 0x44, 0x6a, 0xc4, 0x9c, 0x7a, 0x74, 0x46, 0xab, 0x42, 0x5b, }; /* expected test results */ static const int32_t results[]={ /* number of bytes read, code point */ 2, 0x00a7, 2, 0xe1d2, 2, 0x6962, 2, 0xf842, 2, 0xffe5, }; const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode); if(U_FAILURE(errorCode)) { log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode)); return; } TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)"); /* Test the condition when source >= sourceLimit */ TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); /*Test for the condition where there is an invalid character*/ { static const uint8_t source2[]={0x1a, 0x1b}; TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); } /*Test for the condition where we have a truncated char*/ { static const uint8_t source1[]={0xc4}; ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); } ucnv_close(cnv); } static void TestMBCS() { /* test input */ static const uint8_t in[]={ 0x01, 0xa6, 0xa3, 0x00, 0xa6, 0xa1, 0x08, 0xc2, 0x76, 0xc2, 0x78, }; /* expected test results */ static const int32_t results[]={ /* number of bytes read, code point */ 1, 0x0001, 2, 0x250c, 1, 0x0000, 2, 0x2500, 1, 0x0008, 2, 0xd60c, 2, 0xd60e, }; const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv=ucnv_open("ibm-1363", &errorCode); if(U_FAILURE(errorCode)) { log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode)); return; } TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)"); /* Test the condition when source >= sourceLimit */ TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); /*Test for the condition where there is an invalid character*/ { static const uint8_t source2[]={0xa1, 0x80}; TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); } /*Test for the condition where we have a truncated char*/ { static const uint8_t source1[]={0xc4}; ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); } ucnv_close(cnv); } #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO static void TestICCRunout() { /* { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */ const char *cnvName = "ibm-1363"; UErrorCode status = U_ZERO_ERROR; const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 }; /* UChar expectUData[] = { 0x00a1, 0x001a }; */ const char *source = sourceData; const char *sourceLim = sourceData+sizeof(sourceData); UChar c1, c2, c3; UConverter *cnv=ucnv_open(cnvName, &status); if(U_FAILURE(status)) { log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status)); return; } #if 0 { UChar targetBuf[256]; UChar *target = targetBuf; UChar *targetLim = target+256; ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &status); log_info("After convert: target@%d, source@%d, status%s\n", target-targetBuf, source-sourceData, u_errorName(status)); if(U_FAILURE(status)) { log_err("Failed to convert: %s\n", u_errorName(status)); } else { } } #endif c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status); log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status)); c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status); log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status)); c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status); log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status)); if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) { log_verbose("OK\n"); } else { log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n"); } ucnv_close(cnv); } #endif #ifdef U_ENABLE_GENERIC_ISO_2022 static void TestISO_2022() { /* test input */ static const uint8_t in[]={ 0x1b, 0x25, 0x42, 0x31, 0x32, 0x61, 0xc2, 0x80, 0xe0, 0xa0, 0x80, 0xf0, 0x90, 0x80, 0x80 }; /* expected test results */ static const int32_t results[]={ /* number of bytes read, code point */ 4, 0x0031, /* 4 bytes including the escape sequence */ 1, 0x0032, 1, 0x61, 2, 0x80, 3, 0x800, 4, 0x10000 }; const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv; cnv=ucnv_open("ISO_2022", &errorCode); if(U_FAILURE(errorCode)) { log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); return; } TestNextUChar(cnv, source, limit, results, "ISO_2022"); /* Test the condition when source >= sourceLimit */ TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source"); TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); /*Test for the condition where we have a truncated char*/ { static const uint8_t source1[]={0xc4}; ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); } /*Test for the condition where there is an invalid character*/ { static const uint8_t source2[]={0xa1, 0x01}; TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character"); } ucnv_close(cnv); } #endif static void TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ const UChar* uSource; const UChar* uSourceLimit; const char* cSource; const char* cSourceLimit; UChar *uTargetLimit =NULL; UChar *uTarget; char *cTarget; const char *cTargetLimit; char *cBuf; UChar *uBuf; /*,*test;*/ int32_t uBufSize = 120; int len=0; int i=2; UErrorCode errorCode=U_ZERO_ERROR; uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); ucnv_reset(cnv); for(;--i>0; ){ uSource = (UChar*) source; uSourceLimit=(const UChar*)sourceLimit; cTarget = cBuf; uTarget = uBuf; cSource = cBuf; cTargetLimit = cBuf; uTargetLimit = uBuf; do{ cTargetLimit = cTargetLimit+ i; ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode); if(errorCode==U_BUFFER_OVERFLOW_ERROR){ errorCode=U_ZERO_ERROR; continue; } if(U_FAILURE(errorCode)){ log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); return; } }while (uSource0;){ uSource = (UChar*) source; cTarget = cBuf; uTarget = uBuf; cSource = cBuf; cTargetLimit = cBuf; uTargetLimit = uBuf+uBufSize*5; cTargetLimit = cTargetLimit+uBufSize*10; uSourceLimit=uSource; do{ if (uSourceLimit < sourceLimit) { uSourceLimit = uSourceLimit+1; } ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode); if(errorCode==U_BUFFER_OVERFLOW_ERROR){ errorCode=U_ZERO_ERROR; continue; } if(U_FAILURE(errorCode)){ log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); return; } }while (uSource0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){ *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; } if(srcLen==-1){ srcLen = (int32_t)uprv_strlen(src); } for (; srcIndex0xFFFF){ dst[dstIndex++] = U16_LEAD(c); if(dstIndex&@*/ }; static const uint16_t expectedISO2022JIS[] = { 0x0041, 0x0042, 0xFF81, 0xFF82, 0x3000 }; static const int32_t toISO2022JISOffs[]={ 3,4, 8,9, 16 }; static const uint8_t sampleTextJIS7[] = { 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/ 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ 0x1b,0x24,0x42,0x21,0x21, 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */ 0x21,0x22, 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore &@*/ }; static const uint16_t expectedISO2022JIS7[] = { 0x0041, 0x0042, 0xFF81, 0xFF82, 0x3000, 0xFF81, 0xFF82, 0x3001, 0x3000 }; static const int32_t toISO2022JIS7Offs[]={ 3,4, 8,9, 13,16, 17, 19,27 }; static const uint8_t sampleTextJIS8[] = { 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/ 0xa1,0xc8,0xd9,/*Katakana Set*/ 0x1b,0x28,0x42, 0x41,0x42, 0xb1,0xc3, /*Katakana Set*/ 0x1b,0x24,0x42,0x21,0x21 }; static const uint16_t expectedISO2022JIS8[] = { 0x0041, 0x0042, 0xff61, 0xff88, 0xff99, 0x0041, 0x0042, 0xff71, 0xff83, 0x3000 }; static const int32_t toISO2022JIS8Offs[]={ 3, 4, 5, 6, 7, 11, 12, 13, 14, 18, }; testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS, sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toISO2022JISOffs,TRUE); testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7, sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", toISO2022JIS7Offs,TRUE); testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8, sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", toISO2022JIS8Offs,TRUE); } } #if 0 ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 static void TestJitterbug915(){ /* tests for roundtripping of the below sequence \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * / \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * / \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * / \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * / \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * / \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * / \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * / */ static const char cSource[]={ 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x37, 0x20, 0x2A, 0x2F }; UChar uTarget[500]={'\0'}; UChar* utarget=uTarget; UChar* utargetLimit=uTarget+sizeof(uTarget)/2; char cTarget[500]={'\0'}; char* ctarget=cTarget; char* ctargetLimit=cTarget+sizeof(cTarget); const char* csource=cSource; const char* tempSrc = cSource; UErrorCode err=U_ZERO_ERROR; UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err); if(U_FAILURE(err)) { log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); return; } ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err); if(U_FAILURE(err)) { log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err)); return; } utargetLimit=utarget; utarget = uTarget; ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err); if(U_FAILURE(err)) { log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err)); return; } ctargetLimit=ctarget; ctarget =cTarget; while(ctarget UCNV_IRREGULAR) { return; } if (reason != UCNV_IRREGULAR) { log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n"); } /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */ *err = U_ZERO_ERROR; ucnv_cbToUWriteSub(toArgs,0,err); } enum { kEmptySegmentToUCharsMax = 64 }; static void TestJitterbug6175(void) { static const char iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A }; static const char iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A }; static const char iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A }; static const char iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A }; static const char hzGB2312_a[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 }; static const EmptySegmentTest emptySegmentTests[] = { /* converterName inputText inputTextLength */ { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) }, { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) }, { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) }, { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) }, { "HZ-GB-2312", hzGB2312_a, sizeof(hzGB2312_a) }, /* terminator: */ { NULL, NULL, 0, } }; const EmptySegmentTest * testPtr; for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) { UErrorCode err = U_ZERO_ERROR; UConverter * cnv = ucnv_open(testPtr->converterName, &err); if (U_FAILURE(err)) { log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err)); return; } ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err); if (U_FAILURE(err)) { log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err)); ucnv_close(cnv); return; } { UChar toUChars[kEmptySegmentToUCharsMax]; UChar * toUCharsPtr = toUChars; const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax; const char * inCharsPtr = testPtr->inputText; const char * inCharsLimit = inCharsPtr + testPtr->inputTextLength; ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err); } ucnv_close(cnv); } } static void TestEBCDIC_STATEFUL() { /* test input */ static const uint8_t in[]={ 0x61, 0x1a, 0x0f, 0x4b, 0x42, 0x40, 0x36, }; /* expected test results */ static const int32_t results[]={ /* number of bytes read, code point */ 1, 0x002f, 1, 0x0092, 2, 0x002e, 1, 0xff62, 1, 0x0020, 1, 0x0096, }; static const uint8_t in2[]={ 0x0f, 0xa1, 0x01 }; /* expected test results */ static const int32_t results2[]={ /* number of bytes read, code point */ 2, 0x203E, 1, 0x0001, }; const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv=ucnv_open("ibm-930", &errorCode); if(U_FAILURE(errorCode)) { log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode)); return; } TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)"); ucnv_reset(cnv); /* Test the condition when source >= sourceLimit */ TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); ucnv_reset(cnv); /*Test for the condition where source > sourcelimit after consuming the shift chracter */ { static const uint8_t source1[]={0x0f}; TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated"); } /*Test for the condition where there is an invalid character*/ ucnv_reset(cnv); { static const uint8_t source2[]={0x0e, 0x7F, 0xFF}; TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]"); } ucnv_reset(cnv); source=(const char*)in2; limit=(const char*)in2+sizeof(in2); TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2"); ucnv_close(cnv); } static void TestGB18030() { /* test input */ static const uint8_t in[]={ 0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x35 #if 0 /* * Feature removed markus 2000-oct-26 * Only some codepages must match surrogate pairs into supplementary code points - * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c . * GB 18030 provides direct encodings for supplementary code points, therefore * it must not combine two single-encoded surrogates into one code point. */ 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */ #endif }; /* expected test results */ static const int32_t results[]={ /* number of bytes read, code point */ 1, 0x24, 1, 0x7f, 4, 0x80, 2, 0x1f9, 2, 0x20ac, 2, 0x4e00, 4, 0x9fa6, 4, 0xffff, 4, 0x10000, 4, 0x10ffff #if 0 /* Feature removed. See comment above. */ 8, 0x10000 #endif }; /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */ UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv=ucnv_open("gb18030", &errorCode); if(U_FAILURE(errorCode)) { log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode)); return; } TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030"); ucnv_close(cnv); } static void TestLMBCS() { /* LMBCS-1 string */ static const uint8_t pszLMBCS[]={ 0x61, 0x01, 0x29, 0x81, 0xA0, 0x0F, 0x27, 0x0F, 0x91, 0x14, 0x0a, 0x74, 0x14, 0xF6, 0x02, 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */ 0x10, 0x88, 0xA0, }; /* Unicode UChar32 equivalents */ static const UChar32 pszUnicode32[]={ /* code point */ 0x00000061, 0x00002013, 0x000000FC, 0x000000E1, 0x00000007, 0x00000091, 0x00000a74, 0x00000200, 0x00023456, /* code point for surrogate pair */ 0x00005516 }; /* Unicode UChar equivalents */ static const UChar pszUnicode[]={ /* code point */ 0x0061, 0x2013, 0x00FC, 0x00E1, 0x0007, 0x0091, 0x0a74, 0x0200, 0xD84D, /* low surrogate */ 0xDC56, /* high surrogate */ 0x5516 }; /* expected test results */ static const int offsets32[]={ /* number of bytes read, code point */ 0, 1, 3, 4, 5, 7, 9, 12, 15, 21, 24 }; /* expected test results */ static const int offsets[]={ /* number of bytes read, code point */ 0, 1, 3, 4, 5, 7, 9, 12, 15, 18, 21, 24 }; UConverter *cnv; #define NAME_LMBCS_1 "LMBCS-1" #define NAME_LMBCS_2 "LMBCS-2" /* Some basic open/close/property tests on some LMBCS converters */ { char expected_subchars[] = {0x3F}; /* ANSI Question Mark */ char new_subchars [] = {0x7F}; /* subst char used by SmartSuite..*/ char get_subchars [1]; const char * get_name; UConverter *cnv1; UConverter *cnv2; int8_t len = sizeof(get_subchars); UErrorCode errorCode=U_ZERO_ERROR; /* Open */ cnv1=ucnv_open(NAME_LMBCS_1, &errorCode); if(U_FAILURE(errorCode)) { log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); return; } cnv2=ucnv_open(NAME_LMBCS_2, &errorCode); if(U_FAILURE(errorCode)) { log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode)); return; } /* Name */ get_name = ucnv_getName (cnv1, &errorCode); if (strcmp(NAME_LMBCS_1,get_name)){ log_err("Unexpected converter name: %s\n", get_name); } get_name = ucnv_getName (cnv2, &errorCode); if (strcmp(NAME_LMBCS_2,get_name)){ log_err("Unexpected converter name: %s\n", get_name); } /* substitution chars */ ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode); if(U_FAILURE(errorCode)) { log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); } if (len!=1){ log_err("Unexpected length of sub chars\n"); } if (get_subchars[0] != expected_subchars[0]){ log_err("Unexpected value of sub chars\n"); } ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode); if(U_FAILURE(errorCode)) { log_err("Failure on set subst chars: %s\n", u_errorName(errorCode)); } ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode); if(U_FAILURE(errorCode)) { log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); } if (len!=1){ log_err("Unexpected length of sub chars\n"); } if (get_subchars[0] != new_subchars[0]){ log_err("Unexpected value of sub chars\n"); } ucnv_close(cnv1); ucnv_close(cnv2); } /* LMBCS to Unicode - offsets */ { UErrorCode errorCode=U_ZERO_ERROR; const char * pSource = (const char *)pszLMBCS; const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); UChar Out [sizeof(pszUnicode) + 1]; UChar * pOut = Out; UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); int32_t off [sizeof(offsets)]; /* last 'offset' in expected results is just the final size. (Makes other tests easier). Compensate here: */ off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS); cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */ if(U_FAILURE(errorCode)) { log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode)); return; } ucnv_toUnicode (cnv, &pOut, OutLimit, &pSource, sourceLimit, off, TRUE, &errorCode); if (memcmp(off,offsets,sizeof(offsets))) { log_err("LMBCS->Uni: Calculated offsets do not match expected results\n"); } if (memcmp(Out,pszUnicode,sizeof(pszUnicode))) { log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n"); } ucnv_close(cnv); } { /* LMBCS to Unicode - getNextUChar */ const char * sourceStart; const char *source=(const char *)pszLMBCS; const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS); const UChar32 *results= pszUnicode32; const int *off = offsets32; UErrorCode errorCode=U_ZERO_ERROR; UChar32 uniChar; cnv=ucnv_open("LMBCS-1", &errorCode); if(U_FAILURE(errorCode)) { log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); return; } else { while(source Unicode */ UErrorCode errorCode=U_ZERO_ERROR; const char * pSource = (const char *)pszLMBCS; const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); int codepointCount = 0; UChar Out [sizeof(pszUnicode) + 1]; UChar * pOut = Out; UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); cnv = ucnv_open(NAME_LMBCS_1, &errorCode); if(U_FAILURE(errorCode)) { log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); return; } while ((pSource < sourceLimit) && U_SUCCESS (errorCode)) { ucnv_toUnicode (cnv, &pOut, OutLimit, &pSource, (pSource+1), /* claim that this is a 1- byte buffer */ NULL, FALSE, /* FALSE means there might be more chars in the next buffer */ &errorCode); if (U_SUCCESS (errorCode)) { if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1]) { /* we are on to the next code point: check value */ if (Out[0] != pszUnicode[codepointCount]){ log_err("LMBCS->Uni result %lx should have been %lx \n", Out[0], pszUnicode[codepointCount]); } pOut = Out; /* reset for accumulating next code point */ codepointCount++; } } else { log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode)); } } { /* limits & surrogate error testing */ char LIn [sizeof(pszLMBCS)]; const char * pLIn = LIn; char LOut [sizeof(pszLMBCS)]; char * pLOut = LOut; UChar UOut [sizeof(pszUnicode)]; UChar * pUOut = UOut; UChar UIn [sizeof(pszUnicode)]; const UChar * pUIn = UIn; int32_t off [sizeof(offsets)]; UChar32 uniChar; errorCode=U_ZERO_ERROR; /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */ pUIn++; ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &errorCode); if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) { log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode)); } pUIn--; errorCode=U_ZERO_ERROR; ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode); if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) { log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode)); } errorCode=U_ZERO_ERROR; uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode); if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) { log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode)); } errorCode=U_ZERO_ERROR; /* 0 byte source request - no error, no pointer movement */ ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode); ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode); if(U_FAILURE(errorCode)) { log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode)); } if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn)) { log_err("Unexpected pointer move in 0 byte source request \n"); } /*0 byte source request - GetNextUChar : error & value == fffe or ffff */ uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode); if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR) { log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode)); } if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */ { log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n"); } errorCode = U_ZERO_ERROR; /* running out of target room : U_BUFFER_OVERFLOW_ERROR */ pUIn = pszUnicode; ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnicode)/sizeof(UChar),off,FALSE, &errorCode); if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 ) { log_err("Unexpected results on out of target room to ucnv_fromUnicode\n"); } errorCode = U_ZERO_ERROR; pLIn = (const char *)pszLMBCS; ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode); if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4]) { log_err("Unexpected results on out of target room to ucnv_toUnicode\n"); } /* unpaired or chopped LMBCS surrogates */ /* OK high surrogate, Low surrogate is chopped */ LIn [0] = (char)0x14; LIn [1] = (char)0xD8; LIn [2] = (char)0x01; LIn [3] = (char)0x14; LIn [4] = (char)0xDC; pLIn = LIn; errorCode = U_ZERO_ERROR; pUOut = UOut; ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5) { log_err("Unexpected results on chopped low surrogate\n"); } /* chopped at surrogate boundary */ LIn [0] = (char)0x14; LIn [1] = (char)0xD8; LIn [2] = (char)0x01; pLIn = LIn; errorCode = U_ZERO_ERROR; pUOut = UOut; ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode); if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3) { log_err("Unexpected results on chopped at surrogate boundary \n"); } /* unpaired surrogate plus valid Unichar */ LIn [0] = (char)0x14; LIn [1] = (char)0xD8; LIn [2] = (char)0x01; LIn [3] = (char)0x14; LIn [4] = (char)0xC9; LIn [5] = (char)0xD0; pLIn = LIn; errorCode = U_ZERO_ERROR; pUOut = UOut; ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode); if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6) { log_err("Unexpected results after unpaired surrogate plus valid Unichar \n"); } /* unpaired surrogate plus chopped Unichar */ LIn [0] = (char)0x14; LIn [1] = (char)0xD8; LIn [2] = (char)0x01; LIn [3] = (char)0x14; LIn [4] = (char)0xC9; pLIn = LIn; errorCode = U_ZERO_ERROR; pUOut = UOut; ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5) { log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n"); } /* unpaired surrogate plus valid non-Unichar */ LIn [0] = (char)0x14; LIn [1] = (char)0xD8; LIn [2] = (char)0x01; LIn [3] = (char)0x0F; LIn [4] = (char)0x3B; pLIn = LIn; errorCode = U_ZERO_ERROR; pUOut = UOut; ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5) { log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n"); } /* unpaired surrogate plus chopped non-Unichar */ LIn [0] = (char)0x14; LIn [1] = (char)0xD8; LIn [2] = (char)0x01; LIn [3] = (char)0x0F; pLIn = LIn; errorCode = U_ZERO_ERROR; pUOut = UOut; ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode); if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4) { log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n"); } } } ucnv_close(cnv); /* final cleanup */ } static void TestJitterbug255() { static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 }; const char *testBuffer = (const char *)testBytes; const char *testEnd = (const char *)testBytes + sizeof(testBytes); UErrorCode status = U_ZERO_ERROR; /*UChar32 result;*/ UConverter *cnv = 0; cnv = ucnv_open("shift-jis", &status); if (U_FAILURE(status) || cnv == 0) { log_data_err("Failed to open the converter for SJIS.\n"); return; } while (testBuffer != testEnd) { /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status); if (U_FAILURE(status)) { log_err("Failed to convert the next UChar for SJIS.\n"); break; } } ucnv_close(cnv); } static void TestEBCDICUS4XML() { UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000}; static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000}; static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00}; static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00}; char target_x[] = {0x00, 0x00, 0x00, 0x00}; UChar *unicodes = unicodes_x; const UChar *toUnicodeMaps = toUnicodeMaps_x; char *target = target_x; const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x; UErrorCode status = U_ZERO_ERROR; UConverter *cnv = 0; cnv = ucnv_open("ebcdic-xml-us", &status); if (U_FAILURE(status) || cnv == 0) { log_data_err("Failed to open the converter for EBCDIC-XML-US.\n"); return; } ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status); if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) { log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n", u_errorName(status)); printUSeqErr(unicodes_x, 3); printUSeqErr(toUnicodeMaps, 3); } status = U_ZERO_ERROR; ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status); if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) { log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n", u_errorName(status)); printSeqErr((const unsigned char*)target_x, 3); printSeqErr((const unsigned char*)fromUnicodeMaps, 3); } ucnv_close(cnv); } #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */ #if !UCONFIG_NO_COLLATION static void TestJitterbug981(){ const UChar* rules; int32_t rules_length, target_cap, bytes_needed, buff_size; UErrorCode status = U_ZERO_ERROR; UConverter *utf8cnv; UCollator* myCollator; char *buff; int numNeeded=0; utf8cnv = ucnv_open ("utf8", &status); if(U_FAILURE(status)){ log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status)); return; } myCollator = ucol_open("zh", &status); if(U_FAILURE(status)){ log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status)); ucnv_close(utf8cnv); return; } rules = ucol_getRules(myCollator, &rules_length); buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv); buff = malloc(buff_size); target_cap = 0; do { ucnv_reset(utf8cnv); status = U_ZERO_ERROR; if(target_cap >= buff_size) { log_err("wanted %d bytes, only %d available\n", target_cap, buff_size); break; } bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap, rules, rules_length, &status); target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; if(numNeeded!=0 && numNeeded!= bytes_needed){ log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes"); break; } numNeeded = bytes_needed; } while (status == U_BUFFER_OVERFLOW_ERROR); ucol_close(myCollator); ucnv_close(utf8cnv); free(buff); } #endif static void TestJitterbug1293(){ static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000}; char target[256]; UErrorCode status = U_ZERO_ERROR; UConverter* conv=NULL; int32_t target_cap, bytes_needed, numNeeded = 0; conv = ucnv_open("shift-jis",&status); if(U_FAILURE(status)){ log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status)); return; } do{ target_cap =0; bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status); target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; if(numNeeded!=0 && numNeeded!= bytes_needed){ log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes"); } numNeeded = bytes_needed; } while (status == U_BUFFER_OVERFLOW_ERROR); if(U_FAILURE(status)){ log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status)); return; } ucnv_close(conv); } static void TestJB5275_1(){ static const char* data = "\x3B\xB3\x0A" /* Easy characters */ "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */ /* Switch script: */ "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */ "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/ "\xEF\x40\x3B\xB3\x0A"; static const UChar expected[] ={ 0x003b, 0x0a15, 0x000a, /* Easy characters */ 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */ 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/ 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/ 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/ }; UErrorCode status = U_ZERO_ERROR; UConverter* conv = ucnv_open("iscii-gur", &status); UChar dest[100] = {'\0'}; UChar* target = dest; UChar* targetLimit = dest+100; const char* source = data; const char* sourceLimit = data+strlen(data); const UChar* exp = expected; if (U_FAILURE(status)) { log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status)); return; } log_verbose("Testing switching back to default script when new line is encountered.\n"); ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status); if(U_FAILURE(status)){ log_err("conversion failed: %s \n", u_errorName(status)); } targetLimit = target; target = dest; printUSeq(target, targetLimit-target); while(target