/******************************************************************** * COPYRIGHT: * Copyright (c) 1997-1999, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ /******************************************************************************** * * File CCONVTST.C * * Modification History: * Name Description * Steven R. Loomis 7/8/1999 Adding input buffer test ********************************************************************************* */ #include #include #include #include #include "unicode/uloc.h" #include "unicode/ucnv.h" #include "unicode/ucnv_err.h" #include "cintltst.h" #include "unicode/utypes.h" #include "unicode/ustring.h" static void printSeq(const unsigned char* a, int len); static void printUSeq(const UChar* a, int len); void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ; void TestConverterTypesAndStarters(void); void TestAmbiguous(void); void TestUTF8(void); void TestLMBCS(void); void TestJitterbug255(void); void TestEBCDICUS4XML(void); #define NEW_MAX_BUFFER 999 static int32_t gInBufferSize = 0; static int32_t gOutBufferSize = 0; static char gNuConvTestName[1024]; #define nct_min(x,y) ((x %d chars out]. \nResult :", sourceLen, targ-junkout); if(VERBOSITY) { char junk[9999]; char offset_str[9999]; char *p; junk[0] = 0; offset_str[0] = 0; for(p = junkout;p %d chars.\nResult :", sourcelen, targ-junkout); if(VERBOSITY) { char junk[9999]; char offset_str[9999]; UChar *p; junk[0] = 0; offset_str[0] = 0; for(p = junkout;p h1 h2 h3 . */ const char expectedIBM930[] = { (char)0xF1, (char)0xF2, (char)0xF3, (char)0x0E, (char)0x45, (char)0x41, (char)0x45, (char)0x42, (char)0x45, (char)0x43, (char)0x0F, (char)0x4B }; int32_t toIBM930Offs[] = { (char)0x00, (char)0x01, (char)0x02, (char)0x03, (char)0x03, (char)0x03, (char)0x04, (char)0x04, (char)0x05, (char)0x05, (char)0x06, (char)0x06, }; /* 1 2 3 . */ const char expectedISO88593[] = { (char)0x31, (char)0x32, (char)0x33, (char)0x1a, (char)0x1a, (char)0x1a, (char)0x2E }; int32_t toISO88593Offs[] = {(char) 0x00, (char)0x01, (char)0x02, (char)0x03, (char)0x04, (char)0x05, (char)0x06, }; /* 1 2 3 h1 h2 h3 . */ const char expectedIBM943[] = { (char)0x31, (char)0x32, (char)0x33, (char)0x88, (char)0xea, (char)0x93, (char)0xf1, (char)0x8e, (char)0x4f, (char)0x2e }; int32_t toIBM943Offs [] = { (char)0x00, (char)0x01, (char)0x02, (char)0x03, (char)0x03, (char)0x04, (char)0x04, (char)0x05, (char)0x05, (char)0x06, }; /* etc */ const char expectedUTF16LE[] = { (char)0x31, (char)0x00, (char)0x32, (char)0x00, (char)0x33, (char)0x00, (char)0x00, (char)0x4e, (char)0x8c, (char)0x4e, (char)0x09, (char)0x4e, (char)0x2e, (char)0x00 }; int32_t toUTF16LEOffs[]= { (char)0x00, (char)0x00, (char)0x02, (char)0x02, (char)0x04, (char)0x04, (char)0x06, (char)0x06, (char)0x08, (char)0x08, (char)0x0a, (char)0x0a, (char)0x0c, (char)0x0c, }; /** Test chars #2 NOT USED YET**/ /* Sahha [health], slashed h's */ const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 }; const char expectedMaltese913[] = { (char)0x53, (char)0x61, (char)0xB1, (char)0xB1, (char)0x61 }; /*********************************** START OF CODE finally *************/ gInBufferSize = insize; gOutBufferSize = outsize; log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize); #if 0 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs )) log_err("u-> UTF8 did not match.\n"); if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedISO2022, sizeof(expectedISO2022), "iso-2022", toISO2022Offs )) log_err("u-> iso-2022 did not match.\n"); if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs )) log_err("u-> ibm-930 did not match.\n"); if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs )) log_err("u-> iso-8859-3 did not match.\n"); if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs )) log_err("u-> ibm-943 [UCNV_MBCS] not match.\n"); if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs )) log_err("u-> utf-16le did not match.\n"); /****/ #endif #if 0 if(!testConvertToU(expectedUTF8, sizeof(expectedUTF8), sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs )) log_err("utf8 -> u did not match\n"); if(!testConvertToU(expectedISO2022, sizeof(expectedISO2022), sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "iso-2022", fmISO2022Offs )) log_err("iso-2022 -> u did not match"); #endif #if 0 if(!testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ibm-930", fmIBM930Offs )) log_err("ibm-930 -> u did not match"); if(!testConvertToU(expectedIBM943, sizeof(expectedIBM943), sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ibm-943", fmIBM943Offs )) log_err("ibm-943 -> u did not match"); if(!testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs )) log_err("utf-16le -> u did not match"); #endif if(!testConvertToU(expectedMaltese913, sizeof(expectedMaltese913), malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL)) log_err("latin3[813] -> u did not match\n"); if(!testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL )) log_err("u-> latin3[813] did not match.\n"); } void TestConverterTypesAndStarters() { UConverter* myConverter[3]; UErrorCode err = U_ZERO_ERROR; UBool mystarters[256]; const UBool expectedKSCstarters[256] = { FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE}; log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types."); myConverter[0] = ucnv_open("ksc", &err); if (U_FAILURE(err)) log_err("Failed to create an ibm-949 converter\n"); else { if (ucnv_getType(myConverter[0])!=UCNV_MBCS) log_err("ucnv_getType Failed for ibm-949\n"); else log_verbose("ucnv_getType ibm-949 ok\n"); if(myConverter[0]!=NULL) ucnv_getStarters(myConverter[0], mystarters, &err); /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters))) log_err("Failed ucnv_getStarters for ksc\n"); else log_verbose("ucnv_getStarters ok\n");*/ } myConverter[1] = ucnv_open("ibm-930", &err); if (U_FAILURE(err)) log_err("Failed to create an ibm-930 converter\n"); else { if (ucnv_getType(myConverter[1])!=UCNV_EBCDIC_STATEFUL) log_err("ucnv_getType Failed for ibm-930\n"); else log_verbose("ucnv_getType ibm-930 ok\n"); } myConverter[2] = ucnv_open("ibm-878", &err); if (U_FAILURE(err)) log_err("Failed to create an ibm-815 converter\n"); else { if (ucnv_getType(myConverter[2])!=UCNV_SBCS) log_err("ucnv_getType Failed for ibm-815\n"); else log_verbose("ucnv_getType ibm-815 ok\n"); } ucnv_close(myConverter[0]); ucnv_close(myConverter[1]); ucnv_close(myConverter[2]); } void TestAmbiguous() { UErrorCode status = U_ZERO_ERROR; UConverter *ascii_cnv = 0, *sjis_cnv = 0; const char target[] = { /* "\\usr\\local\\share\\data\\icutest.txt" */ 0x5c, 0x75, 0x73, 0x72, 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65, 0x5c, 0x64, 0x61, 0x74, 0x61, 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74, 0 }; UChar *asciiResult = 0, *sjisResult = 0; int32_t asciiLength = 0, sjisLength = 0; sjis_cnv = ucnv_open("SJIS", &status); if (U_FAILURE(status)) { log_err("Failed to create a SJIS converter\n"); return; } ascii_cnv = ucnv_open("LATIN-1", &status); if (U_FAILURE(status)) { log_err("Failed to create a SJIS converter\n"); ucnv_close(sjis_cnv); return; } /* convert target from SJIS to Unicode */ sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, 0, target, strlen(target), &status); status = U_ZERO_ERROR; sjisResult = (UChar*)malloc(sizeof(UChar)* sjisLength); ucnv_toUChars(sjis_cnv, sjisResult, sjisLength, target, strlen(target), &status); if (U_FAILURE(status)) { log_err("Failed to convert the SJIS string.\n"); ucnv_close(sjis_cnv); ucnv_close(ascii_cnv); return; } /* convert target from Latin-1 to Unicode */ asciiLength = ucnv_toUChars(ascii_cnv, asciiResult, 0, target, strlen(target), &status); status = U_ZERO_ERROR; asciiResult = (UChar*)malloc(sizeof(UChar)* asciiLength); ucnv_toUChars(ascii_cnv, asciiResult, asciiLength, target, strlen(target), &status); if (U_FAILURE(status)) { log_err("Failed to convert the Latin-1 string.\n"); free(sjisResult); ucnv_close(sjis_cnv); ucnv_close(ascii_cnv); return; } if (!ucnv_isAmbiguous(sjis_cnv)) { log_err("SJIS converter should contain ambiguous character mappings.\n"); free(sjisResult); free(asciiResult); ucnv_close(sjis_cnv); ucnv_close(ascii_cnv); return; } if (u_strcmp(sjisResult, asciiResult) == 0) { log_err("File separators for SJIS don't need to be fixed.\n"); } ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength); if (u_strcmp(sjisResult, asciiResult) != 0) { log_err("Fixing file separator for SJIS failed.\n"); } free(sjisResult); free(asciiResult); ucnv_close(sjis_cnv); ucnv_close(ascii_cnv); } void TestUTF8() { /* test input */ static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80, 0xf4, 0x84, 0x8c, 0xa1 }; /* expected test results */ static const uint32_t results[]={ /* number of bytes read, code point */ 1, 0x61, 2, 0, 3, 0, 4, 0, 4, 0x104321 }; const char *s=(const char *)in, *s0, *limit=(const char *)in+sizeof(in); const uint32_t *r=results; UErrorCode errorCode=U_ZERO_ERROR; uint32_t c; UConverter *cnv=ucnv_open("UTF-8", &errorCode); if(U_FAILURE(errorCode)) { log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode)); } while(s