/******************************************************************** * COPYRIGHT: * Copyright (c) 1997-2001, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ /******************************************************************************** * * File CCONVTST.C * * Modification History: * Name Description * Steven R. Loomis 7/8/1999 Adding input buffer test ********************************************************************************* */ #include #include "cstring.h" #include "unicode/uloc.h" #include "unicode/ucnv.h" #include "unicode/ucnv_err.h" #include "cintltst.h" #include "unicode/utypes.h" #include "unicode/ustring.h" #include "unicode/ucol.h" #include "cmemory.h" static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const uint32_t results[], const char* message); static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message); #if !UCONFIG_NO_COLLATION static void TestJitterbug981(void); #endif static void TestJitterbug1293(void); static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ; static void TestConverterTypesAndStarters(void); static void TestAmbiguous(void); static void TestSignatureDetection(void); static void TestUTF7(void); static void TestIMAP(void); static void TestUTF8(void); static void TestCESU8(void); static void TestUTF16(void); static void TestUTF16BE(void); static void TestUTF16LE(void); static void TestUTF32(void); static void TestUTF32BE(void); static void TestUTF32LE(void); static void TestLATIN1(void); static void TestSBCS(void); static void TestDBCS(void); static void TestMBCS(void); static void TestISO_2022(void); static void TestISO_2022_JP(void); static void TestISO_2022_JP_1(void); static void TestISO_2022_JP_2(void); static void TestISO_2022_KR(void); static void TestISO_2022_KR_1(void); static void TestISO_2022_CN(void); static void TestISO_2022_CN_EXT(void); static void TestJIS(void); static void TestHZ(void); static void TestSCSU(void); static void TestEBCDIC_STATEFUL(void); static void TestGB18030(void); static void TestLMBCS(void); static void TestJitterbug255(void); static void TestJitterbug792(void); static void TestEBCDICUS4XML(void); static void TestJitterbug915(void); static void TestISCII(void); static void TestConv(const uint16_t in[], int len, const char* conv, const char* lang, char byteArr[], int byteArrLen); static void TestRoundTrippingAllUTF(void); static void TestCoverageMBCS(void); static void TestJitterbug2346(void); static void TestJitterbug2411(void); void addTestNewConvert(TestNode** root); /* open a converter, using test data if it begins with '@' */ static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err); #define NEW_MAX_BUFFER 999 static int32_t gInBufferSize = NEW_MAX_BUFFER; static int32_t gOutBufferSize = NEW_MAX_BUFFER; static char gNuConvTestName[1024]; #define nct_min(x,y) ((x %d chars out]. \nResult :", sourceLen, targ-junkout); if(VERBOSITY) { char junk[9999]; char offset_str[9999]; uint8_t *ptr; junk[0] = 0; offset_str[0] = 0; for(ptr = junkout;ptr%s\n", gNuConvTestName); printUSeqErr(source, sourceLen); printf("\nGot:"); printSeqErr((const unsigned char *)junkout, expectLen); printf("\nExpected:"); printSeqErr((const unsigned char *)expect, expectLen); return TC_MISMATCH; } } /* Note: This function uses global variables and it will not do offset checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, const char *codepage, const int32_t *expectOffsets, UBool useFallback) { UErrorCode status = U_ZERO_ERROR; UConverter *conv = 0; UChar junkout[NEW_MAX_BUFFER]; /* FIX */ int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ const uint8_t *src; const uint8_t *realSourceEnd; const uint8_t *srcLimit; UChar *p; UChar *targ; UChar *end; int32_t *offs; int i; UBool checkOffsets = TRUE; int32_t realBufferSize; UChar *realBufferEnd; for(i=0;i %d chars.\nResult :", sourcelen, targ-junkout); if(VERBOSITY) { char junk[9999]; char offset_str[9999]; UChar *ptr; junk[0] = 0; offset_str[0] = 0; for(ptr = junkout;ptr h1 h2 h3 . EBCDIC_STATEFUL */ const uint8_t expectedIBM930[] = { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B }; int32_t toIBM930Offs[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, }; int32_t fmIBM930Offs[] = { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c}; /* 1 2 3 0 h1 h2 h3 . MBCS*/ const uint8_t expectedIBM943[] = { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e }; int32_t toIBM943Offs [] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07 }; int32_t fmIBM943Offs[] = { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a}; /* 1 2 3 0 h1 h2 h3 . DBCS*/ const uint8_t expectedIBM9027[] = { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe}; int32_t toIBM9027Offs [] = { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07}; /* 1 2 3 0 . SBCS*/ const uint8_t expectedIBM920[] = { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e }; int32_t toIBM920Offs [] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 }; /* 1 2 3 0 . SBCS*/ const uint8_t expectedISO88593[] = { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E }; int32_t toISO88593Offs[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07}; /* 1 2 3 0 . LATIN_1*/ const uint8_t expectedLATIN1[] = { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E }; int32_t toLATIN1Offs[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07}; /* etc */ const uint8_t expectedUTF16BE[] = { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e }; int32_t toUTF16BEOffs[]= { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07}; int32_t fmUTF16BEOffs[] = { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e }; const uint8_t expectedUTF16LE[] = { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00 }; int32_t toUTF16LEOffs[]= { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07}; int32_t fmUTF16LEOffs[] = { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e }; const uint8_t expectedUTF32BE[] = { 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x00, 0x00, 0x4e, 0x09, 0x00, 0x00, 0x00, 0x2e }; int32_t toUTF32BEOffs[]= { 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; int32_t fmUTF32BEOffs[] = { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c }; const uint8_t expectedUTF32LE[] = { 0x31, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x00, 0x00, 0x8c, 0x4e, 0x00, 0x00, 0x09, 0x4e, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00 }; int32_t toUTF32LEOffs[]= { 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; int32_t fmUTF32LEOffs[] = { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c }; /** Test chars #2 **/ /* Sahha [health], slashed h's */ const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 }; const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 }; /* LMBCS */ const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666 }; const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04 }; int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 }; int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006}; /*********************************** START OF CODE finally *************/ gInBufferSize = insize; gOutBufferSize = outsize; log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize); #if 1 /*UTF-8*/ testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE ); log_verbose("Test surrogate behaviour for UTF8\n"); { const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 }; const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac, 0xf0, 0x90, 0x90, 0x81, 0xef, 0xbf, 0xbd }; int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 }; testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]), expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE ); } /*ISO-2022*/ testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE ); /*UTF16 LE*/ testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE ); /*UTF16 BE*/ testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE ); /*UTF32 LE*/ testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE ); /*UTF32 BE*/ testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE ); /*LATIN_1*/ testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE ); /*EBCDIC_STATEFUL*/ testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE ); testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); /*MBCS*/ testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE ); /*DBCS*/ testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedIBM9027, sizeof(expectedIBM9027), "ibm-9027", toIBM9027Offs,FALSE ); /*SBCS*/ testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE ); /*SBCS*/ testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); /****/ #endif #if 1 /*UTF-8*/ testConvertToU(expectedUTF8, sizeof(expectedUTF8), sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs,FALSE); /*ISO-2022*/ testConvertToU(expectedISO2022, sizeof(expectedISO2022), sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2022Offs,FALSE); /*UTF16 LE*/ testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE); /*UTF16 BE*/ testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE), sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs,FALSE); /*UTF32 LE*/ testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE), sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs,FALSE); /*UTF32 BE*/ testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE), sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs,FALSE); /*EBCDIC_STATEFUL*/ testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ibm-930", fmIBM930Offs,FALSE); /*MBCS*/ testConvertToU(expectedIBM943, sizeof(expectedIBM943), sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ibm-943", fmIBM943Offs,FALSE); /* Try it again to make sure it still works */ testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE); testConvertToU(expectedMaltese913, sizeof(expectedMaltese913), malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL,FALSE); testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE ); /*LMBCS*/ testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE ); testConvertToU(expectedLMBCS, sizeof(expectedLMBCS), LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLMBCSOffs,FALSE); /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */ { /* encode directly set D and set O */ static const uint8_t utf7[] = { /* Hi Mom -+Jjo--! A+ImIDkQ. +- +ZeVnLIqe */ 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, 0x2b, 0x2d, 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65 }; static const UChar unicode[] = { /* Hi Mom --! A. + [Japanese word "nihongo"] */ 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, 0x41, 0x2262, 0x0391, 0x2e, 0x2b, 0x65e5, 0x672c, 0x8a9e }; static const int32_t toUnicodeOffsets[] = { 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, 15, 17, 19, 23, 24, 27, 29, 32 }; static const int32_t fromUnicodeOffsets[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 11, 12, 12, 12, 13, 13, 13, 13, 14, 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18 }; /* same but escaping set O (the exclamation mark) */ static const uint8_t utf7Restricted[] = { /* Hi Mom -+Jjo--+ACE- A+ImIDkQ. +- +ZeVnLIqe */ 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d, 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, 0x2b, 0x2d, 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65 }; static const int32_t toUnicodeOffsetsR[] = { 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15, 19, 21, 23, 27, 28, 31, 33, 36 }; static const int32_t fromUnicodeOffsetsR[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10, 11, 12, 12, 12, 13, 13, 13, 13, 14, 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18 }; testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE); testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE); testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE); testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE); } /* * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152, * modified according to RFC 2060, * and supplemented with the one example in RFC 2060 itself. */ { static const uint8_t imap[] = { /* Hi Mom -&Jjo--! A&ImIDkQ-. &- &ZeVnLIqe- \ ~peter /mail /&ZeVnLIqe- /&U,BTFw- */ 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e, 0x26, 0x2d, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, 0x5c, 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, 0x2f, 0x6d, 0x61, 0x69, 0x6c, 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d }; static const UChar unicode[] = { /* Hi Mom --! A. & [Japanese word "nihongo"] \ ~peter /mail /<65e5, 672c, 8a9e> /<53f0, 5317> */ 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, 0x41, 0x2262, 0x0391, 0x2e, 0x26, 0x65e5, 0x672c, 0x8a9e, 0x5c, 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, 0x2f, 0x6d, 0x61, 0x69, 0x6c, 0x2f, 0x65e5, 0x672c, 0x8a9e, 0x2f, 0x53f0, 0x5317 }; static const int32_t toUnicodeOffsets[] = { 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, 15, 17, 19, 24, 25, 28, 30, 33, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 51, 53, 56, 60, 62, 64 }; static const int32_t fromUnicodeOffsets[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 11, 12, 12, 12, 13, 13, 13, 13, 13, 14, 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 35, 36, 36, 36, 37, 37, 37, 37, 37 }; testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE); testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE); } /* Test UTF-8 bad data handling*/ { static const uint8_t utf8[]={ 0x61, 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 0x00, 0x62, 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */ 0xdf, 0xbf, /* 7ff */ 0xbf, /* truncated tail */ 0xf4, 0x90, 0x80, 0x80, /* 11FFFF */ 0x02 }; static const uint16_t utf8Expected[]={ 0x0061, 0xfffd, 0x0000, 0x0062, 0xfffd, 0xfffd, 0xdbff, 0xdfff, 0x07ff, 0xfffd, 0xfffd, 0x0002 }; static const int32_t utf8Offsets[]={ 0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28 }; testConvertToU(utf8, sizeof(utf8), utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]), "utf-8", utf8Offsets ,FALSE); } /* Test UTF-32BE bad data handling*/ { static const uint8_t utf32[]={ 0x00, 0x00, 0x00, 0x61, 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ 0x00, 0x00, 0x00, 0x62, 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ 0x00, 0x00, 0x01, 0x62, 0x00, 0x00, 0x02, 0x62 }; static const uint16_t utf32Expected[]={ 0x0061, 0xfffd, /* 0x110000 out of range */ 0xDBFF, /* 0x10FFFF in range */ 0xDFFF, 0x0062, 0xfffd, /* 0xffffffff out of range */ 0xfffd, /* 0x7fffffff out of range */ 0x0162, 0x0262 }; static const int32_t utf32Offsets[]={ 0, 4, 8, 8, 12, 16, 20, 24, 28 }; testConvertToU(utf32, sizeof(utf32), utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32be", utf32Offsets ,FALSE); } /* Test UTF-32LE bad data handling*/ { static const uint8_t utf32[]={ 0x61, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ 0x62, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ 0x62, 0x01, 0x00, 0x00, 0x62, 0x02, 0x00, 0x00, }; static const uint16_t utf32Expected[]={ 0x0061, 0xfffd, /* 0x110000 out of range */ 0xDBFF, /* 0x10FFFF in range */ 0xDFFF, 0x0062, 0xfffd, /* 0xffffffff out of range */ 0xfffd, /* 0x7fffffff out of range */ 0x0162, 0x0262 }; static const int32_t utf32Offsets[]={ 0, 4, 8, 8, 12, 16, 20, 24, 28 }; testConvertToU(utf32, sizeof(utf32), utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32le", utf32Offsets,FALSE ); } } static void TestCoverageMBCS(){ #if 0 UErrorCode status = U_ZERO_ERROR; const char *directory = loadTestData(&status); char* tdpath = NULL; char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1)); int len = strlen(directory); char* index=NULL; tdpath = (char*) malloc(sizeof(char) * (len * 2)); uprv_strcpy(saveDirectory,u_getDataDirectory()); log_verbose("Retrieved data directory %s \n",saveDirectory); uprv_strcpy(tdpath,directory); index=strrchr(tdpath,(char)U_FILE_SEP_CHAR); if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){ *(index+1)=0; } u_setDataDirectory(tdpath); log_verbose("ICU data directory is set to: %s \n" ,tdpath); #endif /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm which is test file for MBCS conversion with single-byte codepage data.*/ { /* MBCS with single byte codepage data test1.ucm*/ const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003}; const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,}; int32_t totest1Offs[] = { 0, 1, 2, 3, 5, }; const uint8_t test1input[] = { 0x00, 0x05, 0x06, 0x07, 0x08, 0x09}; const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0xfffd, 0xfffd}; int32_t fromtest1Offs[] = { 0, 1, 2, 3, 3, 4, 5}; /*from Unicode*/ testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE ); /*to Unicode*/ testConvertToU(test1input, sizeof(test1input), expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test1", fromtest1Offs ,FALSE); } /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm which is test file for MBCS conversion with three-byte codepage data.*/ { /* MBCS with three byte codepage data test3.ucm*/ const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,}; int32_t totest3Offs[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8}; const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,}; const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 }; /*from Unicode*/ testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE ); /*to Unicode*/ testConvertToU(test3input, sizeof(test3input), expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", fromtest3Offs ,FALSE); } /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm which is test file for MBCS conversion with four-byte codepage data.*/ { /* MBCS with three byte codepage data test4.ucm*/ static const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,}; static const int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,}; static const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,}; static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,}; /*from Unicode*/ testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE ); /*to Unicode*/ testConvertToU(test4input, sizeof(test4input), expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", fromtest4Offs,FALSE ); } #if 0 free(tdpath); /* restore the original data directory */ log_verbose("Setting the data directory to %s \n", saveDirectory); u_setDataDirectory(saveDirectory); free(saveDirectory); #endif } static void TestConverterType(const char *convName, UConverterType convType) { UConverter* myConverter; UErrorCode err = U_ZERO_ERROR; myConverter = my_ucnv_open(convName, &err); if (U_FAILURE(err)) { log_data_err("Failed to create an %s converter\n", convName); return; } else { if (ucnv_getType(myConverter)!=convType) { log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n", convName, convType); } else { log_verbose("ucnv_getType %s ok\n", convName); } } ucnv_close(myConverter); } static void TestConverterTypesAndStarters() { UConverter* myConverter; UErrorCode err = U_ZERO_ERROR; UBool mystarters[256]; /* const UBool expectedKSCstarters[256] = { FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/ log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types."); myConverter = ucnv_open("ksc", &err); if (U_FAILURE(err)) { log_data_err("Failed to create an ibm-ksc converter\n"); return; } else { if (ucnv_getType(myConverter)!=UCNV_MBCS) log_err("ucnv_getType Failed for ibm-949\n"); else log_verbose("ucnv_getType ibm-949 ok\n"); if(myConverter!=NULL) ucnv_getStarters(myConverter, mystarters, &err); /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters))) log_err("Failed ucnv_getStarters for ksc\n"); else log_verbose("ucnv_getStarters ok\n");*/ } ucnv_close(myConverter); TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL); TestConverterType("ibm-878", UCNV_SBCS); TestConverterType("iso-8859-1", UCNV_LATIN_1); TestConverterType("ibm-1208", UCNV_UTF8); TestConverterType("utf-8", UCNV_UTF8); TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian); TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian); TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian); TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian); TestConverterType("iso-2022", UCNV_ISO_2022); TestConverterType("hz", UCNV_HZ); TestConverterType("scsu", UCNV_SCSU); TestConverterType("x-iscii-de", UCNV_ISCII); TestConverterType("ascii", UCNV_US_ASCII); TestConverterType("utf-7", UCNV_UTF7); TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX); TestConverterType("bocu-1", UCNV_BOCU1); } static void TestAmbiguousConverter(UConverter *cnv) { static const char inBytes[2]={ 0x61, 0x5c }; UChar outUnicode[20]={ 0, 0, 0, 0 }; const char *s; UChar *u; UErrorCode errorCode; UBool isAmbiguous; /* try to convert an 'a' and a US-ASCII backslash */ errorCode=U_ZERO_ERROR; s=inBytes; u=outUnicode; ucnv_toUnicode(cnv, &u, u+20, &s, s+2, NULL, TRUE, &errorCode); if(U_FAILURE(errorCode)) { /* we do not care about general failures in this test; the input may just not be mappable */ return; } if(outUnicode[0]!=0x61 || outUnicode[1]==0xfffd) { /* not an ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */ return; } isAmbiguous=ucnv_isAmbiguous(cnv); /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */ if((outUnicode[1]!=0x5c)!=isAmbiguous) { log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n", ucnv_getName(cnv, &errorCode), outUnicode[1]!=0x5c, isAmbiguous); return; } if(outUnicode[1]!=0x5c) { /* needs fixup, fix it */ ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode)); if(outUnicode[1]!=0x5c) { /* the fix failed */ log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode)); return; } } } static void TestAmbiguous() { UErrorCode status = U_ZERO_ERROR; UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv; const char target[] = { /* "\\usr\\local\\share\\data\\icutest.txt" */ 0x5c, 0x75, 0x73, 0x72, 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65, 0x5c, 0x64, 0x61, 0x74, 0x61, 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74, 0 }; UChar asciiResult[200], sjisResult[200]; int32_t asciiLength = 0, sjisLength = 0, i; const char *name; /* enumerate all converters */ status=U_ZERO_ERROR; for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) { cnv=ucnv_open(name, &status); if(U_SUCCESS(status)) { TestAmbiguousConverter(cnv); ucnv_close(cnv); } else { log_err("error: unable to open available converter \"%s\"\n", name); status=U_ZERO_ERROR; } } sjis_cnv = ucnv_open("ibm-943", &status); if (U_FAILURE(status)) { log_data_err("Failed to create a SJIS converter\n"); return; } ascii_cnv = ucnv_open("LATIN-1", &status); if (U_FAILURE(status)) { log_data_err("Failed to create a LATIN-1 converter\n"); ucnv_close(sjis_cnv); return; } /* convert target from SJIS to Unicode */ sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF_UCHAR, target, strlen(target), &status); if (U_FAILURE(status)) { log_err("Failed to convert the SJIS string.\n"); ucnv_close(sjis_cnv); ucnv_close(ascii_cnv); return; } /* convert target from Latin-1 to Unicode */ asciiLength = ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, strlen(target), &status); if (U_FAILURE(status)) { log_err("Failed to convert the Latin-1 string.\n"); free(sjisResult); ucnv_close(sjis_cnv); ucnv_close(ascii_cnv); return; } if (!ucnv_isAmbiguous(sjis_cnv)) { log_err("SJIS converter should contain ambiguous character mappings.\n"); free(sjisResult); free(asciiResult); ucnv_close(sjis_cnv); ucnv_close(ascii_cnv); return; } if (u_strcmp(sjisResult, asciiResult) == 0) { log_err("File separators for SJIS don't need to be fixed.\n"); } ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength); if (u_strcmp(sjisResult, asciiResult) != 0) { log_err("Fixing file separator for SJIS failed.\n"); } ucnv_close(sjis_cnv); ucnv_close(ascii_cnv); } static void TestSignatureDetection(){ /* with null terminated strings */ { static const char* data[] = { "\xFE\xFF\x00\x00", /* UTF-16BE */ "\xFF\xFE\x00\x00", /* UTF-16LE */ "\xEF\xBB\xBF\x00", /* UTF-8 */ "\x0E\xFE\xFF\x00", /* SCSU */ "\xFE\xFF", /* UTF-16BE */ "\xFF\xFE", /* UTF-16LE */ "\xEF\xBB\xBF", /* UTF-8 */ "\x0E\xFE\xFF", /* SCSU */ "\xFE\xFF\x41\x42", /* UTF-16BE */ "\xFF\xFE\x41\x41", /* UTF-16LE */ "\xEF\xBB\xBF\x41", /* UTF-8 */ "\x0E\xFE\xFF\x41", /* SCSU */ "\x2B\x2F\x76\x38\x2D", /* UTF-7 */ "\x2B\x2F\x76\x38\x41", /* UTF-7 */ "\x2B\x2F\x76\x39\x41", /* UTF-7 */ "\x2B\x2F\x76\x2B\x41", /* UTF-7 */ "\x2B\x2F\x76\x2F\x41" /* UTF-7 */ }; static const char* expected[] = { "UTF-16BE", "UTF-16LE", "UTF-8", "SCSU", "UTF-16BE", "UTF-16LE", "UTF-8", "SCSU", "UTF-16BE", "UTF-16LE", "UTF-8", "SCSU", "UTF-7", "UTF-7", "UTF-7", "UTF-7", "UTF-7" }; static const int32_t expectedLength[] ={ 2, 2, 3, 3, 2, 2, 3, 3, 2, 2, 3, 3, 5, 4, 4, 4, 4 }; int i=0; UErrorCode err; int32_t signatureLength = -1; const char* source = NULL; const char* enc = NULL; for( ; i */ 2, 0x2d, 1, 0x21, 2, 0x2b, 7, 0x10401 }; const char *cnvName; const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv=ucnv_open("UTF-7", &errorCode); if(U_FAILURE(errorCode)) { log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */ return; } TestNextUChar(cnv, source, limit, results, "UTF-7"); /* Test the condition when source >= sourceLimit */ TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); cnvName = ucnv_getName(cnv, &errorCode); if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) { log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode)); } ucnv_close(cnv); } void static TestIMAP() { /* test input */ static const uint8_t in[]={ /* H - &Jjo- - ! &- &2AHcAQ- \ */ 0x48, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, 0x26, 0x2d, 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d }; /* expected test results */ static const uint32_t results[]={ /* number of bytes read, code point */ 1, 0x48, 1, 0x2d, 4, 0x263a, /* */ 2, 0x2d, 1, 0x21, 2, 0x26, 7, 0x10401 }; const char *cnvName; const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode); if(U_FAILURE(errorCode)) { log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */ return; } TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name"); /* Test the condition when source >= sourceLimit */ TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); cnvName = ucnv_getName(cnv, &errorCode); if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) { log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode)); } ucnv_close(cnv); } void static TestUTF8() { /* test input */ static const uint8_t in[]={ 0x61, 0xc2, 0x80, 0xe0, 0xa0, 0x80, 0xf0, 0x90, 0x80, 0x80, 0xf4, 0x84, 0x8c, 0xa1, 0xf0, 0x90, 0x90, 0x81 }; /* expected test results */ static const uint32_t results[]={ /* number of bytes read, code point */ 1, 0x61, 2, 0x80, 3, 0x800, 4, 0x10000, 4, 0x104321, 4, 0x10401 }; /* error test input */ static const uint8_t in2[]={ 0x61, 0xc0, 0x80, /* illegal non-shortest form */ 0xe0, 0x80, 0x80, /* illegal non-shortest form */ 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ 0xc0, 0xc0, /* illegal trail byte */ 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ 0xfe, /* illegal byte altogether */ 0x62 }; /* expected error test results */ static const uint32_t results2[]={ /* number of bytes read, code point */ 1, 0x61, 22, 0x62 }; UConverterToUCallback cb; const void *p; const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv=ucnv_open("UTF-8", &errorCode); if(U_FAILURE(errorCode)) { log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode)); return; } TestNextUChar(cnv, source, limit, results, "UTF-8"); /* Test the condition when source >= sourceLimit */ TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); /* test error behavior with a skip callback */ ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); source=(const char *)in2; limit=(const char *)(in2+sizeof(in2)); TestNextUChar(cnv, source, limit, results2, "UTF-8"); ucnv_close(cnv); } void static TestCESU8() { /* test input */ static const uint8_t in[]={ 0x61, 0xc2, 0x80, 0xe0, 0xa0, 0x80, 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82, 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf, 0xef, 0xbf, 0xbc }; /* expected test results */ static const uint32_t results[]={ /* number of bytes read, code point */ 1, 0x61, 2, 0x80, 3, 0x800, 6, 0x10000, 3, 0xdc01, 3, 0xd802, 6, 0x10ffff, 3, 0xfffc }; /* error test input */ static const uint8_t in2[]={ 0x61, 0xc0, 0x80, /* illegal non-shortest form */ 0xe0, 0x80, 0x80, /* illegal non-shortest form */ 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ 0xc0, 0xc0, /* illegal trail byte */ 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */ 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */ 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */ 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ 0xfe, /* illegal byte altogether */ 0x62 }; /* expected error test results */ static const uint32_t results2[]={ /* number of bytes read, code point */ 1, 0x61, 34, 0x62 }; UConverterToUCallback cb; const void *p; const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv=ucnv_open("CESU-8", &errorCode); if(U_FAILURE(errorCode)) { log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode)); return; } TestNextUChar(cnv, source, limit, results, "CESU-8"); /* Test the condition when source >= sourceLimit */ TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); /* test error behavior with a skip callback */ ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); source=(const char *)in2; limit=(const char *)(in2+sizeof(in2)); TestNextUChar(cnv, source, limit, results2, "CESU-8"); ucnv_close(cnv); } void static TestUTF16() { /* test input */ static const uint8_t in1[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff }; static const uint8_t in2[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff }; static const uint8_t in3[]={ 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01 }; /* expected test results */ static const uint32_t results1[]={ /* number of bytes read, code point */ 4, 0x4e00, 2, 0xfeff }; static const uint32_t results2[]={ /* number of bytes read, code point */ 4, 0x004e, 2, 0xfffe }; static const uint32_t results3[]={ /* number of bytes read, code point */ 2, 0xfefe, 2, 0x4e00, 2, 0xfeff, 4, 0x20001 }; const char *source, *limit; UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv=ucnv_open("UTF-16", &errorCode); if(U_FAILURE(errorCode)) { log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode)); return; } source=(const char *)in1, limit=(const char *)in1+sizeof(in1); TestNextUChar(cnv, source, limit, results1, "UTF-16"); source=(const char *)in2, limit=(const char *)in2+sizeof(in2); ucnv_resetToUnicode(cnv); TestNextUChar(cnv, source, limit, results2, "UTF-16"); source=(const char *)in3, limit=(const char *)in3+sizeof(in3); ucnv_resetToUnicode(cnv); TestNextUChar(cnv, source, limit, results3, "UTF-16"); /* Test the condition when source >= sourceLimit */ ucnv_resetToUnicode(cnv); TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); ucnv_close(cnv); } void static TestUTF16BE() { /* test input */ static const uint8_t in[]={ 0x00, 0x61, 0x00, 0xc0, 0x00, 0x31, 0x00, 0xf4, 0xce, 0xfe, 0xd8, 0x01, 0xdc, 0x01 }; /* expected test results */ static const uint32_t results[]={ /* number of bytes read, code point */ 2, 0x61, 2, 0xc0, 2, 0x31, 2, 0xf4, 2, 0xcefe, 4, 0x10401 }; const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv=ucnv_open("utf-16be", &errorCode); if(U_FAILURE(errorCode)) { log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode)); return; } TestNextUChar(cnv, source, limit, results, "UTF-16BE"); /* Test the condition when source >= sourceLimit */ TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); /*Test for the condition where there is an invalid character*/ { static const uint8_t source2[]={0x61}; TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); } /*Test for the condition where there is a surrogate pair*/ { const uint8_t source2[]={0xd8, 0x01}; TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); } ucnv_close(cnv); } static void TestUTF16LE() { /* test input */ static const uint8_t in[]={ 0x61, 0x00, 0x31, 0x00, 0x4e, 0x2e, 0x4e, 0x00, 0x01, 0xd8, 0x01, 0xdc }; /* expected test results */ static const uint32_t results[]={ /* number of bytes read, code point */ 2, 0x61, 2, 0x31, 2, 0x2e4e, 2, 0x4e, 4, 0x10401 }; const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv=ucnv_open("utf-16le", &errorCode); if(U_FAILURE(errorCode)) { log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode)); return; } TestNextUChar(cnv, source, limit, results, "UTF-16LE"); /* Test the condition when source >= sourceLimit */ TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); /*Test for the condition where there is an invalid character*/ { static const uint8_t source2[]={0x61}; TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); } /*Test for the condition where there is a surrogate character*/ { static const uint8_t source2[]={0x01, 0xd8}; TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); } ucnv_close(cnv); } void static TestUTF32() { /* test input */ static const uint8_t in1[]={ 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff }; static const uint8_t in2[]={ 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 }; static const uint8_t in3[]={ 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 }; /* expected test results */ static const uint32_t results1[]={ /* number of bytes read, code point */ 8, 0x100f00, 4, 0xfeff }; static const uint32_t results2[]={ /* number of bytes read, code point */ 8, 0x0f1000, 4, 0xfffe }; static const uint32_t results3[]={ /* number of bytes read, code point */ 4, 0xfefe, 4, 0x100f00, 4, 0xd840, 4, 0xdc01 }; const char *source, *limit; UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv=ucnv_open("UTF-32", &errorCode); if(U_FAILURE(errorCode)) { log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode)); return; } source=(const char *)in1, limit=(const char *)in1+sizeof(in1); TestNextUChar(cnv, source, limit, results1, "UTF-32"); source=(const char *)in2, limit=(const char *)in2+sizeof(in2); ucnv_resetToUnicode(cnv); TestNextUChar(cnv, source, limit, results2, "UTF-32"); source=(const char *)in3, limit=(const char *)in3+sizeof(in3); ucnv_resetToUnicode(cnv); TestNextUChar(cnv, source, limit, results3, "UTF-32"); /* Test the condition when source >= sourceLimit */ ucnv_resetToUnicode(cnv); TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); ucnv_close(cnv); } static void TestUTF32BE() { /* test input */ static const uint8_t in[]={ 0x00, 0x00, 0x00, 0x61, 0x00, 0x00, 0xdc, 0x00, 0x00, 0x00, 0xd8, 0x00, 0x00, 0x00, 0xdf, 0xff, 0x00, 0x00, 0xff, 0xfd, 0x00, 0x10, 0xab, 0xcd, 0x00, 0x10, 0xff, 0xff }; /* expected test results */ static const uint32_t results[]={ /* number of bytes read, code point */ 4, 0x61, 4, 0xdc00, 4, 0xd800, 4, 0xdfff, 4, 0xfffd, 4, 0x10abcd, 4, 0x10ffff }; /* error test input */ static const uint8_t in2[]={ 0x00, 0x00, 0x00, 0x61, 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ 0x00, 0x00, 0x00, 0x62, 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ 0x00, 0x00, 0x01, 0x62, 0x00, 0x00, 0x02, 0x62 }; /* expected error test results */ static const uint32_t results2[]={ /* number of bytes read, code point */ 4, 0x61, 8, 0x62, 12, 0x162, 4, 0x262 }; UConverterToUCallback cb; const void *p; const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv=ucnv_open("UTF-32BE", &errorCode); if(U_FAILURE(errorCode)) { log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode)); return; } TestNextUChar(cnv, source, limit, results, "UTF-32BE"); /* Test the condition when source >= sourceLimit */ TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); /* test error behavior with a skip callback */ ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); source=(const char *)in2; limit=(const char *)(in2+sizeof(in2)); TestNextUChar(cnv, source, limit, results2, "UTF-32BE"); ucnv_close(cnv); } static void TestUTF32LE() { /* test input */ static const uint8_t in[]={ 0x61, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x00, 0x00, 0x00, 0xd8, 0x00, 0x00, 0xff, 0xdf, 0x00, 0x00, 0xfd, 0xff, 0x00, 0x00, 0xcd, 0xab, 0x10, 0x00, 0xff, 0xff, 0x10, 0x00 }; /* expected test results */ static const uint32_t results[]={ /* number of bytes read, code point */ 4, 0x61, 4, 0xdc00, 4, 0xd800, 4, 0xdfff, 4, 0xfffd, 4, 0x10abcd, 4, 0x10ffff }; /* error test input */ static const uint8_t in2[]={ 0x61, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ 0x62, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ 0x62, 0x01, 0x00, 0x00, 0x62, 0x02, 0x00, 0x00, }; /* expected error test results */ static const uint32_t results2[]={ /* number of bytes read, code point */ 4, 0x61, 8, 0x62, 12, 0x162, 4, 0x262, }; UConverterToUCallback cb; const void *p; const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv=ucnv_open("UTF-32LE", &errorCode); if(U_FAILURE(errorCode)) { log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode)); return; } TestNextUChar(cnv, source, limit, results, "UTF-32LE"); /* Test the condition when source >= sourceLimit */ TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); /* test error behavior with a skip callback */ ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); source=(const char *)in2; limit=(const char *)(in2+sizeof(in2)); TestNextUChar(cnv, source, limit, results2, "UTF-32LE"); ucnv_close(cnv); } static void TestLATIN1() { /* test input */ static const uint8_t in[]={ 0x61, 0x31, 0x32, 0xc0, 0xf0, 0xf4, }; /* expected test results */ static const uint32_t results[]={ /* number of bytes read, code point */ 1, 0x61, 1, 0x31, 1, 0x32, 1, 0xc0, 1, 0xf0, 1, 0xf4, }; static const uint16_t in1[] = { 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 0xcb, 0x82 }; static const uint8_t out1[] = { 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 0xcb, 0x82 }; static const uint16_t in2[]={ 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x37, 0x20, 0x2A, 0x2F, }; static const unsigned char out2[]={ 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x37, 0x20, 0x2A, 0x2F, }; const char *source=(const char *)in; const char *limit=(const char *)in+sizeof(in); UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv=ucnv_open("LATIN_1", &errorCode); if(U_FAILURE(errorCode)) { log_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode)); return; } TestNextUChar(cnv, source, limit, results, "LATIN_1"); /* Test the condition when source >= sourceLimit */ TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1)); TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2)); ucnv_close(cnv); } static void TestSBCS() { /* test input */ static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4}; /* expected test results */ static const uint32_t results[]={ /* number of bytes read, code point */ 1, 0x61, 1, 0xbf, 1, 0xc4, 1, 0x2021, 1, 0xf8ff, 1, 0x00d9 }; const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv=ucnv_open("ibm-1281", &errorCode); if(U_FAILURE(errorCode)) { log_data_err("Unable to open a SBCS(ibm-1281) converter: %s\n", u_errorName(errorCode)); return; } TestNextUChar(cnv, source, limit, results, "SBCS(ibm-1281)"); /* Test the condition when source >= sourceLimit */ TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); /*Test for Illegal character */ /* { static const uint8_t input1[]={ 0xA1 }; const char* illegalsource=(const char*)input1; TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte"); } */ ucnv_close(cnv); } static void TestDBCS() { /* test input */ static const uint8_t in[]={ 0x44, 0x6a, 0xc4, 0x9c, 0x7a, 0x74, 0x46, 0xab, 0x42, 0x5b, }; /* expected test results */ static const uint32_t results[]={ /* number of bytes read, code point */ 2, 0x00a7, 2, 0xe1d2, 2, 0x6962, 2, 0xf842, 2, 0xffe5, }; const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv=ucnv_open("ibm-9027", &errorCode); if(U_FAILURE(errorCode)) { log_data_err("Unable to open a DBCS(ibm-9027) converter: %s\n", u_errorName(errorCode)); return; } TestNextUChar(cnv, source, limit, results, "DBCS(ibm-9027)"); /* Test the condition when source >= sourceLimit */ TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); /*Test for the condition where we have a truncated char*/ { static const uint8_t source1[]={0xc4}; TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); } /*Test for the condition where there is an invalid character*/ { static const uint8_t source2[]={0x1a, 0x1b}; TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); } ucnv_close(cnv); } static void TestMBCS() { /* test input */ static const uint8_t in[]={ 0x01, 0xa6, 0xa3, 0x00, 0xa6, 0xa1, 0x08, 0xc2, 0x76, 0xc2, 0x78, }; /* expected test results */ static const uint32_t results[]={ /* number of bytes read, code point */ 1, 0x0001, 2, 0x250c, 1, 0x0000, 2, 0x2500, 1, 0x0008, 2, 0xd60c, 2, 0xd60e, }; const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv=ucnv_open("ibm-1363", &errorCode); if(U_FAILURE(errorCode)) { log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode)); return; } TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)"); /* Test the condition when source >= sourceLimit */ TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); /*Test for the condition where we have a truncated char*/ { static const uint8_t source1[]={0xc4}; TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); } /*Test for the condition where there is an invalid character*/ { static const uint8_t source2[]={0xa1, 0x01}; TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); } ucnv_close(cnv); } static void TestISO_2022() { /* test input */ static const uint8_t in[]={ 0x1b, 0x25, 0x42, 0x31, 0x32, 0x61, 0xc2, 0x80, 0xe0, 0xa0, 0x80, 0xf0, 0x90, 0x80, 0x80 }; /* expected test results */ static const uint32_t results[]={ /* number of bytes read, code point */ 4, 0x0031, 1, 0x0032, 1, 0x61, 2, 0x80, 3, 0x800, 4, 0x10000, }; const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv; cnv=ucnv_open("ISO_2022", &errorCode); if(U_FAILURE(errorCode)) { log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); return; } TestNextUChar(cnv, source, limit, results, "ISO_2022"); /* Test the condition when source >= sourceLimit */ TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source"); TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); /*Test for the condition where we have a truncated char*/ { static const uint8_t source1[]={0xc4}; TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); } /*Test for the condition where there is an invalid character*/ { static const uint8_t source2[]={0xa1, 0x01}; TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); } ucnv_close(cnv); } static void TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ const UChar* uSource; const UChar* uSourceLimit; const char* cSource; const char* cSourceLimit; UChar *uTargetLimit =NULL; UChar *uTarget; char *cTarget; const char *cTargetLimit; char *cBuf; UChar *uBuf,*test; int32_t uBufSize = 120; int len=0; int i=2; UErrorCode errorCode=U_ZERO_ERROR; uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); ucnv_reset(cnv); for(;--i>0; ){ uSource = (UChar*) source; uSourceLimit=(const UChar*)sourceLimit; cTarget = cBuf; uTarget = uBuf; cSource = cBuf; cTargetLimit = cBuf; uTargetLimit = uBuf; do{ cTargetLimit = cTargetLimit+ i; ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode); if(errorCode==U_BUFFER_OVERFLOW_ERROR){ errorCode=U_ZERO_ERROR; continue; } if(U_FAILURE(errorCode)){ log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); return; } }while (uSource0;){ uSource = (UChar*) source; cTarget = cBuf; uTarget = uBuf; cSource = cBuf; cTargetLimit = cBuf; uTargetLimit = uBuf+uBufSize*5; cTargetLimit = cTargetLimit+uBufSize*10; uSourceLimit=uSource; do{ if (uSourceLimit < sourceLimit) { uSourceLimit = uSourceLimit+1; } ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode); if(errorCode==U_BUFFER_OVERFLOW_ERROR){ errorCode=U_ZERO_ERROR; continue; } if(U_FAILURE(errorCode)){ log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); return; } }while (uSource0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){ *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; } if(srcLen==-1){ srcLen = uprv_strlen(src); } for (; srcIndex0xFFFF){ dst[dstIndex++] = UTF16_LEAD(c); if(dstIndex&@*/ }; const uint16_t expectedISO2022JIS[] = { 0x0041, 0x0042, 0xFF81, 0xFF82, 0x3000 }; int32_t toISO2022JISOffs[]={ 3,4, 8,9, 16 }; const uint8_t sampleTextJIS7[] = { 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/ 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ 0x1b,0x24,0x42,0x21,0x21, 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */ 0x21,0x22, 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore &@*/ }; const uint16_t expectedISO2022JIS7[] = { 0x0041, 0x0042, 0xFF81, 0xFF82, 0x3000, 0xFF81, 0xFF82, 0x3001, 0x3000 }; int32_t toISO2022JIS7Offs[]={ 3,4, 8,9, 13,16, 17, 19,27 }; const uint8_t sampleTextJIS8[] = { 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/ 0xa1,0xc8,0xd9,/*Katakana Set*/ 0x1b,0x28,0x42, 0x41,0x42, 0xb1,0xc3, /*Katakana Set*/ 0x1b,0x24,0x42,0x21,0x21 }; const uint16_t expectedISO2022JIS8[] = { 0x0041, 0x0042, 0xff61, 0xff88, 0xff99, 0x0041, 0x0042, 0xff71, 0xff83, 0x3000 }; int32_t toISO2022JIS8Offs[]={ 3, 4, 5, 6, 7, 11, 12, 13, 14, 18, }; testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS, sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toISO2022JISOffs,TRUE); testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7, sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", toISO2022JIS7Offs,TRUE); testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8, sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", toISO2022JIS8Offs,TRUE); } } static void TestJitterbug915(){ /* tests for roundtripping of the below sequence \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * / \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * / \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * / \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * / \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * / \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * / \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * / */ static char cSource[]={ 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x37, 0x20, 0x2A, 0x2F, }; UChar uTarget[500]={'\0'}; UChar* utarget=uTarget; UChar* utargetLimit=uTarget+sizeof(uTarget)/2; char cTarget[500]={'\0'}; char* ctarget=cTarget; char* ctargetLimit=cTarget+sizeof(cTarget); const char* csource=cSource; char* tempSrc = cSource; UErrorCode err=U_ZERO_ERROR; UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err); if(U_FAILURE(err)) { log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); return; } ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err); if(U_FAILURE(err)) { log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err)); return; } utargetLimit=utarget; utarget = uTarget; ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err); if(U_FAILURE(err)) { log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err)); return; } ctargetLimit=ctarget; ctarget =cTarget; while(ctarget= sourceLimit */ TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); ucnv_reset(cnv); /*Test for the condition where source > sourcelimit after consuming the shift chracter */ { static const uint8_t source1[]={0x0f}; TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated"); } /*Test for the condition where there is an invalid character*/ ucnv_reset(cnv); { static const uint8_t source2[]={0x0e, 0x7F, 0xFF}; TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]"); } ucnv_reset(cnv); source=(const char*)in2; limit=(const char*)in2+sizeof(in2); TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2"); ucnv_close(cnv); } static void TestGB18030() { /* test input */ static const uint8_t in[]={ 0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x35 #if 0 /* * Feature removed markus 2000-oct-26 * Only some codepages must match surrogate pairs into supplementary code points - * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c . * GB 18030 provides direct encodings for supplementary code points, therefore * it must not combine two single-encoded surrogates into one code point. */ 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */ #endif }; /* expected test results */ static const uint32_t results[]={ /* number of bytes read, code point */ 1, 0x24, 1, 0x7f, 4, 0x80, 2, 0x1f9, 2, 0x20ac, 2, 0x4e00, 4, 0x9fa6, 4, 0xffff, 4, 0x10000, 4, 0x10ffff #if 0 /* Feature removed. See comment above. */ 8, 0x10000 #endif }; /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */ UErrorCode errorCode=U_ZERO_ERROR; UConverter *cnv=ucnv_open("gb18030", &errorCode); if(U_FAILURE(errorCode)) { log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode)); return; } TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030"); ucnv_close(cnv); } static void TestLMBCS() { /* LMBCS-1 string */ static const uint8_t pszLMBCS[]={ 0x61, 0x01, 0x29, 0x81, 0xA0, 0x0F, 0x27, 0x0F, 0x91, 0x14, 0x0a, 0x74, 0x14, 0xF6, 0x02, 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */ 0x10, 0x88, 0xA0, }; /* Unicode UChar32 equivalents */ static const UChar32 pszUnicode32[]={ /* code point */ 0x00000061, 0x00002013, 0x000000FC, 0x000000E1, 0x00000007, 0x00000091, 0x00000a74, 0x00000200, 0x00023456, /* code point for surrogate pair */ 0x00005516 }; /* Unicode UChar equivalents */ static const UChar pszUnicode[]={ /* code point */ 0x0061, 0x2013, 0x00FC, 0x00E1, 0x0007, 0x0091, 0x0a74, 0x0200, 0xD84D, /* low surrogate */ 0xDC56, /* high surrogate */ 0x5516 }; /* expected test results */ static const int offsets32[]={ /* number of bytes read, code point */ 0, 1, 3, 4, 5, 7, 9, 12, 15, 21, 24 }; /* expected test results */ static const int offsets[]={ /* number of bytes read, code point */ 0, 1, 3, 4, 5, 7, 9, 12, 15, 18, 21, 24 }; UConverter *cnv; #define NAME_LMBCS_1 "LMBCS-1" #define NAME_LMBCS_2 "LMBCS-2" /* Some basic open/close/property tests on some LMBCS converters */ { char expected_subchars[] = {0x3F}; /* ANSI Question Mark */ char new_subchars [] = {0x7F}; /* subst char used by SmartSuite..*/ char get_subchars [1]; const char * get_name; UConverter *cnv1; UConverter *cnv2; int8_t len = sizeof(get_subchars); UErrorCode errorCode=U_ZERO_ERROR; /* Open */ cnv1=ucnv_open(NAME_LMBCS_1, &errorCode); if(U_FAILURE(errorCode)) { log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); return; } cnv2=ucnv_open(NAME_LMBCS_2, &errorCode); if(U_FAILURE(errorCode)) { log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode)); return; } /* Name */ get_name = ucnv_getName (cnv1, &errorCode); if (strcmp(NAME_LMBCS_1,get_name)){ log_err("Unexpected converter name: %s\n", get_name); } get_name = ucnv_getName (cnv2, &errorCode); if (strcmp(NAME_LMBCS_2,get_name)){ log_err("Unexpected converter name: %s\n", get_name); } /* substitution chars */ ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode); if(U_FAILURE(errorCode)) { log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); } if (len!=1){ log_err("Unexpected length of sub chars\n"); } if (get_subchars[0] != expected_subchars[0]){ log_err("Unexpected value of sub chars\n"); } ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode); if(U_FAILURE(errorCode)) { log_err("Failure on set subst chars: %s\n", u_errorName(errorCode)); } ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode); if(U_FAILURE(errorCode)) { log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); } if (len!=1){ log_err("Unexpected length of sub chars\n"); } if (get_subchars[0] != new_subchars[0]){ log_err("Unexpected value of sub chars\n"); } ucnv_close(cnv1); ucnv_close(cnv2); } /* LMBCS to Unicode - offsets */ { UErrorCode errorCode=U_ZERO_ERROR; const uint8_t * pSource = pszLMBCS; const uint8_t * sourceLimit = pszLMBCS + sizeof(pszLMBCS); UChar Out [sizeof(pszUnicode) + 1]; UChar * pOut = Out; UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); int32_t off [sizeof(offsets)]; /* last 'offset' in expected results is just the final size. (Makes other tests easier). Compensate here: */ off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS); cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */ if(U_FAILURE(errorCode)) { log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode)); return; } ucnv_toUnicode (cnv, &pOut, OutLimit, (const char **)&pSource, (const char *)sourceLimit, off, TRUE, &errorCode); if (memcmp(off,offsets,sizeof(offsets))) { log_err("LMBCS->Uni: Calculated offsets do not match expected results\n"); } if (memcmp(Out,pszUnicode,sizeof(pszUnicode))) { log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n"); } ucnv_close(cnv); } { /* LMBCS to Unicode - getNextUChar */ const char * sourceStart; const char *source=(const char *)pszLMBCS; const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS); const UChar32 *results= pszUnicode32; const int *off = offsets32; UErrorCode errorCode=U_ZERO_ERROR; UChar32 uniChar; cnv=ucnv_open("LMBCS-1", &errorCode); if(U_FAILURE(errorCode)) { log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); return; } else { while(source Unicode */ UErrorCode errorCode=U_ZERO_ERROR; const uint8_t * pSource = pszLMBCS; const uint8_t * sourceLimit = pszLMBCS + sizeof(pszLMBCS); int codepointCount = 0; UChar Out [sizeof(pszUnicode) + 1]; UChar * pOut = Out; UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); cnv = ucnv_open(NAME_LMBCS_1, &errorCode); if(U_FAILURE(errorCode)) { log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); return; } while ((pSource < sourceLimit) && U_SUCCESS (errorCode)) { ucnv_toUnicode (cnv, &pOut, OutLimit, (const char **)&pSource, (const char *)(pSource+1), /* claim that this is a 1- byte buffer */ NULL, FALSE, /* FALSE means there might be more chars in the next buffer */ &errorCode); if (U_SUCCESS (errorCode)) { if ((pSource - (const uint8_t *)pszLMBCS) == offsets [codepointCount+1]) { /* we are on to the next code point: check value */ if (Out[0] != pszUnicode[codepointCount]){ log_err("LMBCS->Uni result %lx should have been %lx \n", Out[0], pszUnicode[codepointCount]); } pOut = Out; /* reset for accumulating next code point */ codepointCount++; } } else { log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode)); } } { /* limits & surrogate error testing */ uint8_t LIn [sizeof(pszLMBCS)]; const uint8_t * pLIn = LIn; char LOut [sizeof(pszLMBCS)]; char * pLOut = LOut; UChar UOut [sizeof(pszUnicode)]; UChar * pUOut = UOut; UChar UIn [sizeof(pszUnicode)]; const UChar * pUIn = UIn; int32_t off [sizeof(offsets)]; UChar32 uniChar; errorCode=U_ZERO_ERROR; /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */ ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn-1,off,FALSE, &errorCode); if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) { log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode)); } errorCode=U_ZERO_ERROR; ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode); if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) { log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode)); } errorCode=U_ZERO_ERROR; uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode); if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) { log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode)); } errorCode=U_ZERO_ERROR; /* 0 byte source request - no error, no pointer movement */ ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode); ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode); if(U_FAILURE(errorCode)) { log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode)); } if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn)) { log_err("Unexpected pointer move in 0 byte source request \n"); } /*0 byte source request - GetNextUChar : error & value == fffe or ffff */ uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode); if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) { log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode)); } if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */ { log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n"); } errorCode = U_ZERO_ERROR; /* running out of target room : U_BUFFER_OVERFLOW_ERROR */ pUIn = pszUnicode; ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnicode)/sizeof(UChar),off,FALSE, &errorCode); if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 ) { log_err("Unexpected results on out of target room to ucnv_fromUnicode\n"); } errorCode = U_ZERO_ERROR; pLIn = pszLMBCS; ucnv_toUnicode(cnv, &pUOut,pUOut+4,(const char **)&pLIn,(const char *)(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode); if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const uint8_t *)pszLMBCS+offsets[4]) { log_err("Unexpected results on out of target room to ucnv_toUnicode\n"); } /* unpaired or chopped LMBCS surrogates */ /* OK high surrogate, Low surrogate is chopped */ LIn [0] = 0x14; LIn [1] = 0xD8; LIn [2] = 0x01; LIn [3] = 0x14; LIn [4] = 0xDC; pLIn = LIn; errorCode = U_ZERO_ERROR; pUOut = UOut; ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5) { log_err("Unexpected results on chopped low surrogate\n"); } /* chopped at surrogate boundary */ LIn [0] = 0x14; LIn [1] = 0xD8; LIn [2] = 0x01; pLIn = LIn; errorCode = U_ZERO_ERROR; pUOut = UOut; ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode); if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3) { log_err("Unexpected results on chopped at surrogate boundary \n"); } /* unpaired surrogate plus valid Unichar */ LIn [0] = 0x14; LIn [1] = 0xD8; LIn [2] = 0x01; LIn [3] = 0x14; LIn [4] = 0xC9; LIn [5] = 0xD0; pLIn = LIn; errorCode = U_ZERO_ERROR; pUOut = UOut; ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode); if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6) { log_err("Unexpected results after unpaired surrogate plus valid Unichar \n"); } /* unpaired surrogate plus chopped Unichar */ LIn [0] = 0x14; LIn [1] = 0xD8; LIn [2] = 0x01; LIn [3] = 0x14; LIn [4] = 0xC9; pLIn = LIn; errorCode = U_ZERO_ERROR; pUOut = UOut; ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5) { log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n"); } /* unpaired surrogate plus valid non-Unichar */ LIn [0] = 0x14; LIn [1] = 0xD8; LIn [2] = 0x01; LIn [3] = 0x0F; LIn [4] = 0x3B; pLIn = LIn; errorCode = U_ZERO_ERROR; pUOut = UOut; ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5) { log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n"); } /* unpaired surrogate plus chopped non-Unichar */ LIn [0] = 0x14; LIn [1] = 0xD8; LIn [2] = 0x01; LIn [3] = 0x0F; pLIn = LIn; errorCode = U_ZERO_ERROR; pUOut = UOut; ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode); if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4) { log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n"); } } } ucnv_close(cnv); /* final cleanup */ } static void TestJitterbug255() { const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 }; const uint8_t *testBuffer = testBytes; const uint8_t *testEnd = testBytes + sizeof(testBytes); UErrorCode status = U_ZERO_ERROR; UChar32 result; UConverter *cnv = 0; cnv = ucnv_open("shift-jis", &status); if (U_FAILURE(status) || cnv == 0) { log_data_err("Failed to open the converter for SJIS.\n"); return; } while (testBuffer != testEnd) { result = ucnv_getNextUChar (cnv, (const char **)&testBuffer, (const char *)testEnd , &status); if (U_FAILURE(status)) { log_err("Failed to convert the next UChar for SJIS.\n"); break; } } ucnv_close(cnv); } static void TestJitterbug792() { #define U_NUM_792_CONVERTERS 3 #define U_MAX_792_TEST_SIZE 21 /* FOR ICU 1.8 we have patched the UCM files. This test is to make sure there are no accidental regressions to the old mappings Some day the patch may be unnecessary, after the IBM repository catches up. */ const char * ConverterNames [U_NUM_792_CONVERTERS] = { "ibm-5351", "ibm-5352", "ibm-5353" }; const uint16_t inChars [U_NUM_792_CONVERTERS][U_MAX_792_TEST_SIZE] = { {0x00A1, 0x00D7, 0x00B8, 0x00F7, 0x00BF, 0x05F3, 0x05F4,0x000}, {0x0679, 0xFB66, 0xFB68, 0x0688, 0xFB88, 0x06A9, 0xFB8E, 0xFB90,0x0691 ,0xFB8C,0x06BA, 0xFB9E,0x06BE, 0xFBAA,0xFBAC,0x06C1, 0xFBA6, 0xFBA8, 0x06D2, 0xFBAE, 0x000}, {0x00A8, 0x02C7, 0x00B8, 0x00AF, 0x02DB, 0x00B4, 0x02D9, 0x000} }; const uint16_t * pInChars; const uint8_t outBytes [U_NUM_792_CONVERTERS][U_MAX_792_TEST_SIZE] = { {0xA1, 0xAA, 0xB8, 0xBA, 0xBF, 0xD7, 0xD8, 0x00}, {0x8A, 0x8A, 0x8A, 0x8F, 0x8F, 0x98, 0x98,0x98,0x9A,0x9A,0x9F,0x9F,0xAA,0xAA,0xAA,0xC0,0xC0,0xC0,0xFF,0xFF, 0x00}, {0x8D, 0x8E, 0x8F, 0x9D, 0x9E, 0xB4, 0xFF, 0x00} }; char outBuffer [U_MAX_792_TEST_SIZE]; UErrorCode status = U_ZERO_ERROR; char * pOutBuffer; UConverter *cnv = 0; int i; for (i=0; i target_cap) ? bytes_needed : target_cap +1; if(numNeeded!=0 && numNeeded!= bytes_needed){ log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes"); } numNeeded = bytes_needed; } while (status == U_BUFFER_OVERFLOW_ERROR); ucol_close(myCollator); ucnv_close(utf8cnv); } #endif static void TestJitterbug1293(){ UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000}; char target[256]; UErrorCode status = U_ZERO_ERROR; UConverter* conv=NULL; int32_t target_cap, bytes_needed, numNeeded = 0; conv = ucnv_open("shift-jis",&status); if(U_FAILURE(status)){ log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status)); return; } do{ target_cap =0; bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status); target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; if(numNeeded!=0 && numNeeded!= bytes_needed){ log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes"); } numNeeded = bytes_needed; } while (status == U_BUFFER_OVERFLOW_ERROR); if(U_FAILURE(status)){ log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status)); return; } ucnv_close(conv); } #endif