diff --git a/icu4c/source/common/ucnv2022.c b/icu4c/source/common/ucnv2022.c index bd0be6e362..048aca522e 100644 --- a/icu4c/source/common/ucnv2022.c +++ b/icu4c/source/common/ucnv2022.c @@ -74,6 +74,8 @@ */ #endif +static const char SHIFT_IN_STR[] = "\x0F"; +static const char SHIFT_OUT_STR[] = "\x0E"; static const char UCNV_SS2[] = "\x1B\x4E"; static const char UCNV_SS3[] = "\x1B\x4F"; #define UCNV_SS2_LEN 2 @@ -1556,6 +1558,64 @@ getTrail: }/* end while(mySourceIndexg!=0 || pFromU2022State->cs[0]!=ASCII) && + args->flush && source>=sourceLimit && args->converter->fromUChar32==0 + ) { + int32_t sourceIndex; + + outLen = 0; + + if(pFromU2022State->g != 0) { + buffer[outLen++] = UCNV_SI; + pFromU2022State->g = 0; + } + + if(pFromU2022State->cs[0] != ASCII) { + int32_t escLen = escSeqCharsLen[ASCII]; + uprv_memcpy(buffer + outLen, escSeqChars[ASCII], escLen); + outLen += escLen; + pFromU2022State->cs[0] = (int8_t)ASCII; + } + + /* get the source index of the last input character */ + /* + * TODO this would be simpler and more reliable if we used a pair + * of sourceIndex/prevSourceIndex like in ucnvmbcs.c + * so that we could simply use the prevSourceIndex here; + * this code gives an incorrect result for the rare case of an unmatched + * trail surrogate that is alone in the last buffer of the text stream + */ + sourceIndex=(int32_t)(source-args->source); + if(sourceIndex>0) { + --sourceIndex; + if( U16_IS_TRAIL(args->source[sourceIndex]) && + (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1])) + ) { + --sourceIndex; + } + } else { + sourceIndex=-1; + } + + ucnv_fromUWriteBytes( + args->converter, + buffer, outLen, + (char **)&target, (const char *)targetLimit, + &offsets, sourceIndex, + err); + } + /*save the state and return */ args->source = source; args->target = (char*)target; @@ -1938,6 +1998,53 @@ getTrail: }/* end while(mySourceIndexflush && source>=sourceLimit && args->converter->fromUChar32==0 + ) { + int32_t sourceIndex; + + /* we are switching to ASCII */ + isTargetByteDBCS=FALSE; + + /* get the source index of the last input character */ + /* + * TODO this would be simpler and more reliable if we used a pair + * of sourceIndex/prevSourceIndex like in ucnvmbcs.c + * so that we could simply use the prevSourceIndex here; + * this code gives an incorrect result for the rare case of an unmatched + * trail surrogate that is alone in the last buffer of the text stream + */ + sourceIndex=(int32_t)(source-args->source); + if(sourceIndex>0) { + --sourceIndex; + if( U16_IS_TRAIL(args->source[sourceIndex]) && + (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1])) + ) { + --sourceIndex; + } + } else { + sourceIndex=-1; + } + + ucnv_fromUWriteBytes( + args->converter, + SHIFT_IN_STR, 1, + (char **)&target, (const char *)targetLimit, + &offsets, sourceIndex, + err); + } + /*save the state and return */ args->source = source; args->target = (char*)target; @@ -2230,8 +2337,6 @@ getTrailByte: */ /* The following are defined this way to make the strings truely readonly */ -static const char SHIFT_IN_STR[] = "\x0F"; -static const char SHIFT_OUT_STR[] = "\x0E"; static const char GB_2312_80_STR[] = "\x1B\x24\x29\x41"; static const char ISO_IR_165_STR[] = "\x1B\x24\x29\x45"; static const char CNS_11643_1992_Plane_1_STR[] = "\x1B\x24\x29\x47"; @@ -2509,6 +2614,53 @@ getTrail: }/* end while(mySourceIndexg!=0 && + args->flush && source>=sourceLimit && args->converter->fromUChar32==0 + ) { + int32_t sourceIndex; + + /* we are switching to ASCII */ + pFromU2022State->g=0; + + /* get the source index of the last input character */ + /* + * TODO this would be simpler and more reliable if we used a pair + * of sourceIndex/prevSourceIndex like in ucnvmbcs.c + * so that we could simply use the prevSourceIndex here; + * this code gives an incorrect result for the rare case of an unmatched + * trail surrogate that is alone in the last buffer of the text stream + */ + sourceIndex=(int32_t)(source-args->source); + if(sourceIndex>0) { + --sourceIndex; + if( U16_IS_TRAIL(args->source[sourceIndex]) && + (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1])) + ) { + --sourceIndex; + } + } else { + sourceIndex=-1; + } + + ucnv_fromUWriteBytes( + args->converter, + SHIFT_IN_STR, 1, + (char **)&target, (const char *)targetLimit, + &offsets, sourceIndex, + err); + } + /*save the state and return */ args->source = source; args->target = (char*)target; diff --git a/icu4c/source/test/cintltst/nccbtst.c b/icu4c/source/test/cintltst/nccbtst.c index 1d477874d6..e2acf8437f 100644 --- a/icu4c/source/test/cintltst/nccbtst.c +++ b/icu4c/source/test/cintltst/nccbtst.c @@ -273,14 +273,6 @@ static void TestSkip(int32_t inputsize, int32_t outputsize) }; static const int32_t from_iso_2022_jpOffs [] ={0,2}; - static const UChar iso_2022_jp_inputText1[]={0x3000, 0x00E9, 0x3001, }; - static const uint8_t to_iso_2022_jp1[]={ - 0x1b, 0x24, 0x42, 0x21, 0x21, - 0x21, 0x22, - - }; - static const int32_t from_iso_2022_jpOffs1 [] ={0,0,0,0,0,2,2,}; - /*ISO-2022-JP*/ UChar const iso_2022_jp_inputText2[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, }; static const uint8_t to_iso_2022_jp2[]={ @@ -427,10 +419,6 @@ static void TestSkip(int32_t inputsize, int32_t outputsize) UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs, NULL, 0 )) log_err("u-> iso-2022-jp with skip did not match.\n"); - if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]), - to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp", - UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs1, NULL, 0 )) - log_err("u-> iso-2022-jp with skip did not match.\n"); /* with context */ if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]), to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp", @@ -1379,24 +1367,6 @@ static void TestSub(int32_t inputsize, int32_t outputsize) 0x61, 0xe6, 0xca, 0x8a, }; - /*ISO-2022-JP*/ - static const UChar iso_2022_jp_inputText[]={ 0x0041, 0x00E9, 0x0042,0x00E9,0x3000 }; - static const uint8_t to_iso_2022_jp[]={ - 0x41, - 0x1a, - 0x42, - 0x1a, - 0x1b, 0x24, 0x42, 0x21, 0x21, - }; - - static const int32_t from_iso_2022_jpOffs [] ={ - 0, - 1, - 2, - 3, - 4,4,4,4,4 - }; - static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,}; if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), @@ -1413,13 +1383,6 @@ static void TestSub(int32_t inputsize, int32_t outputsize) to_euc_tw, sizeof(to_euc_tw), "euc-tw", UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 )) log_err("u-> euc-tw with substitute did not match.\n"); - - if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]), - to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", - UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_iso_2022_jpOffs, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND )) - log_err("u-> iso-2022-jp with substitute did not match.\n"); - - } log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n"); @@ -1843,22 +1806,6 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize) 6, 7, 7, 8, }; /*ISO-2022-JP*/ - static const UChar iso_2022_jp_inputText[]={ 0x0041, 0x00E9, 0x0042,0x00E9,0x3000 }; - static const uint8_t to_iso_2022_jp[]={ - 0x41, - 0x25, 0x55, 0x30, 0x30, 0x45, 0x39, - 0x42, - 0x25, 0x55, 0x30, 0x30, 0x45, 0x39, - 0x1b, 0x24, 0x42, 0x21, 0x21, - }; - - static const int32_t from_iso_2022_jpOffs [] ={ - 0, - 1,1,1,1,1,1, - 2, - 3,3,3,3,3,3, - 4,4,4,4,4 - }; static const UChar iso_2022_jp_inputText1[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ; static const uint8_t to_iso_2022_jp1[]={ 0x1b, 0x24, 0x42, 0x21, 0x21, @@ -1908,39 +1855,6 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize) 1,1,1,1,1,1, 2, }; - static const UChar iso_2022_cn_inputText1[]={ 0x4e00, 0x3712, 0x4e01, }; - static const uint8_t to_iso_2022_cn1[]={ - 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x52, 0x3b, - 0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, - 0x0e, 0x36, 0x21, - }; - static const int32_t from_iso_2022_cnOffs1 [] ={ - 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, - }; - static const UChar iso_2022_cn_inputText3[]={ 0x3000, 0x3712, 0x3001, }; - static const uint8_t to_iso_2022_cn3[]={ - 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21, - 0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, - 0x0e, 0x21, 0x22, - }; - static const int32_t from_iso_2022_cnOffs3 [] ={ - 0,0,0,0,0,0,0, - 1,1,1,1,1,1,1, - 2,2,2 - }; - static const UChar iso_2022_cn_inputText2[]={ 0x0041, 0x3712, 0x4e00, }; - static const uint8_t to_iso_2022_cn2[]={ - 0x41, - 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, - 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x52, 0x3b, - }; - static const int32_t from_iso_2022_cnOffs2 [] ={ - 0, - 1,1,1,1,1,1, - 2,2,2,2,2,2,2 - }; static const UChar iso_2022_cn_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042}; @@ -2095,11 +2009,6 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize) UCNV_FROM_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0 )) log_err("u-> euc-tw with subst with value did not match.\n"); - if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]), - to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", - UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0 )) - log_err("u-> iso_2022_jp with subst with value did not match.\n"); - if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]), to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp", UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 )) @@ -2252,18 +2161,6 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize) UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0 )) log_err("u-> iso_2022_cn with subst with value did not match.\n"); - if(!testConvertFromUnicode(iso_2022_cn_inputText1, sizeof(iso_2022_cn_inputText1)/sizeof(iso_2022_cn_inputText1[0]), - to_iso_2022_cn1, sizeof(to_iso_2022_cn1), "iso-2022-cn", - UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs1, NULL, 0 )) - log_err("u-> iso_2022_cn with subst with value did not match.\n"); - if(!testConvertFromUnicode(iso_2022_cn_inputText2, sizeof(iso_2022_cn_inputText2)/sizeof(iso_2022_cn_inputText2[0]), - to_iso_2022_cn2, sizeof(to_iso_2022_cn2), "iso-2022-cn", - UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs2, NULL, 0 )) - log_err("u-> iso_2022_cn with subst with value did not match.\n"); - if(!testConvertFromUnicode(iso_2022_cn_inputText3, sizeof(iso_2022_cn_inputText3)/sizeof(iso_2022_cn_inputText3[0]), - to_iso_2022_cn3, sizeof(to_iso_2022_cn3), "iso-2022-cn", - UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs3, NULL, 0 )) - log_err("u-> iso_2022_cn with subst with value did not match.\n"); if(!testConvertFromUnicode(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]), to_iso_2022_cn4, sizeof(to_iso_2022_cn4), "iso-2022-cn", UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4, NULL, 0 )) diff --git a/icu4c/source/test/cintltst/ncnvtst.c b/icu4c/source/test/cintltst/ncnvtst.c index b16174ca3b..ff687aeb61 100644 --- a/icu4c/source/test/cintltst/ncnvtst.c +++ b/icu4c/source/test/cintltst/ncnvtst.c @@ -438,10 +438,6 @@ static void TestErrorBehaviour(){ static const uint8_t expected2[] = { 0x31,0x1A,0x32}; static const int32_t offsets2[] = { 0x00,0x01,0x02}; - static const UChar sampleText3MBCS[] = { 0x3000, 0x0050, 0xdc01,0x3001}; - static const uint8_t expected3MBCS[] = { 0x1B, 0x24, 0x42, 0x21, 0x21, 0x1B, 0x28, 0x42, 0x50, 0x1A, 0x1B, 0x24, 0x42, 0x21, 0x22,}; - static const int32_t offsets3MBCS[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03,}; - static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01}; static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x42, 0x30, 0x6c,0x1b,0x28,0x42,0x1a}; static const int32_t offsets4MBCS[] = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01,0x02,0x02,0x02,0x02 }; @@ -462,13 +458,6 @@ static void TestErrorBehaviour(){ expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR)) log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n"); - if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), - expected3MBCS, sizeof(expected3MBCS), "iso-2022-jp", offsets3MBCS, TRUE, U_ZERO_ERROR)) - log_err("u->iso-2022-jp [UCNV_MBCS] \n"); - if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), - expected3MBCS, sizeof(expected3MBCS), "iso-2022-jp", offsets3MBCS, FALSE, U_ZERO_ERROR)) - log_err("u-> iso-2022-jp[UCNV_MBCS] \n"); - if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, TRUE, U_ZERO_ERROR)) log_err("u-> iso-2022-jp [UCNV_MBCS] \n"); @@ -542,13 +531,6 @@ static void TestErrorBehaviour(){ static const uint8_t expected3MBCS[] = { 0x1b, 0x24, 0x29, 0x43, 0x51, 0x50, 0x1A }; static const int32_t offsets3MBCS[] = { -1, -1, -1, -1, 0x00, 0x01, 0x02, 0x02 }; - static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01,0x4e00}; - static const uint8_t expected4MBCS[] = { 0x1b, 0x24, 0x29, 0x43, - 0x61, - 0x0e, 0x6c, 0x69, - 0x0f, 0x1a, - 0x0e, 0x6c, 0x69,}; - static const int32_t offsets4MBCS[] = { -1, -1, -1, -1, 0x00, 0x01 ,0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03 }; if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), expectedSUB, sizeof(expectedSUB), "iso-2022-kr", offsets, TRUE, U_ZERO_ERROR)) log_err("u-> iso-2022-kr [UCNV_MBCS] \n"); @@ -572,13 +554,6 @@ static void TestErrorBehaviour(){ if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, FALSE, U_ZERO_ERROR)) log_err("u-> iso-2022-kr[UCNV_MBCS] \n"); - - if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), - expected4MBCS, sizeof(expected4MBCS), "iso-2022-kr", offsets4MBCS, TRUE, U_ZERO_ERROR)) - log_err("u-> iso-2022-kr [UCNV_MBCS] \n"); - if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), - expected4MBCS, sizeof(expected4MBCS), "iso-2022-kr", offsets4MBCS, FALSE, U_ZERO_ERROR)) - log_err("u-> iso-2022-kr [UCNV_MBCS] \n"); } /*HZ*/ diff --git a/icu4c/source/test/cintltst/nucnvtst.c b/icu4c/source/test/cintltst/nucnvtst.c index aef60a0fba..530b4b943e 100644 --- a/icu4c/source/test/cintltst/nucnvtst.c +++ b/icu4c/source/test/cintltst/nucnvtst.c @@ -3983,96 +3983,7 @@ static void TestJitterbug2411(){ static void TestJIS(){ - /* From Unicode */ - { - /* JIS Encoding */ - UChar sampleTextJIS[] ={ - 0xFF81, 0xFF82, - 0x30EC, 0x30ED, - 0x30EE, 0x30EF, - 0xFF93, 0xFF94, - 0xFF95, 0xFF96, - 0xFF97, 0xFF98 - }; - const uint8_t expectedISO2022JIS[] ={ - 0x1b, 0x24, 0x42, - 0x25, 0x41, 0x25, 0x44, - 0x25, 0x6c, 0x25, 0x6d, - 0x25, 0x6e, 0x25, 0x6F, - 0x25, 0x62, 0x25, 0x64, - 0x25, 0x66, 0x25, 0x68, - 0x25, 0x69, 0x25, 0x6a - - }; - int32_t fmISO2022JISOffs[] ={ - 0,0,0, - 0,0,1,1, - 2,2,3,3, - 4,4,5,5, - 6,6,7,7, - 8,8,9,9, - 10,10,11,11 - - }; - - /* JIS7 Encoding */ - const uint8_t expectedISO2022JIS7[] ={ - 0x0e, - 0x41, 0x42, - 0x0f, 0x1b, 0x24, 0x42, - 0x25, 0x6c, 0x25, 0x6d, - 0x25, 0x6e, 0x25, 0x6F, - 0x0e, - 0x53, 0x54, - 0x55, 0x56, - 0x57, 0x58 - - }; - int32_t fmISO2022JIS7Offs[] ={ - 0, - 0,1, - 2,2,2,2, - 2,2,3,3, - 4,4,5,5, - 6, - 6,7, - 8,9, - 10,11 - - }; - - /* JIS8 Encoding */ - const uint8_t expectedISO2022JIS8[] ={ - 0xC1, 0xC2, - 0x1b, 0x24, 0x42, - 0x25, 0x6c, 0x25, 0x6d, - 0x25, 0x6e, 0x25, 0x6F, - 0x1b, 0x28, 0x4A, - 0xD3, 0xD4, - 0xD5, 0xD6, - 0xD7, 0xD8 - - }; - int32_t fmISO2022JIS8Offs[] ={ - 0,1, - 2,2,2, - 2,2,3,3, - 4,4,5,5, - 6,6,6, - 6,7, - 8,9, - 10,11 - - }; - testConvertFromU(sampleTextJIS, sizeof(sampleTextJIS)/sizeof(sampleTextJIS[0]), - expectedISO2022JIS, sizeof(expectedISO2022JIS), "JIS", fmISO2022JISOffs,TRUE ); - testConvertFromU(sampleTextJIS, sizeof(sampleTextJIS)/sizeof(sampleTextJIS[0]), - expectedISO2022JIS7, sizeof(expectedISO2022JIS7), "JIS7", fmISO2022JIS7Offs,FALSE ); - testConvertFromU(sampleTextJIS, sizeof(sampleTextJIS)/sizeof(sampleTextJIS[0]), - expectedISO2022JIS8, sizeof(expectedISO2022JIS8), "JIS8", fmISO2022JIS8Offs,FALSE ); - - - } + /* From Unicode moved to testdata/conversion.txt */ /*To Unicode*/ { const uint8_t sampleTextJIS[] = { diff --git a/icu4c/source/test/testdata/conversion.txt b/icu4c/source/test/testdata/conversion.txt index 7dd962b9a6..58604e6d7d 100644 --- a/icu4c/source/test/testdata/conversion.txt +++ b/icu4c/source/test/testdata/conversion.txt @@ -420,6 +420,105 @@ conversion { fromUnicode { Headers { "charset", "unicode", "bytes", "offsets", "flush", "fallbacks", "errorCode", "callback", "invalidUChars" } Cases { + // moved from cintltst /tsconv/nccbtst/TestSkipCallBack + { + "iso-2022-jp", + "\u3000\xe9\u3001", + :bin{ 1b2442212121221b2842 }, + :intvector{ 0,0,0,0,0,2,2,2,2,2 }, + :int{1}, :int{1}, "", "0", "" + } + // moved from cintltst /tsconv/nccbtst/TestSubCallBack + { + "iso-2022-jp", + "A\xe9B\xe9\u3000", + :bin{ 411a421a1b244221211b2842 }, + :intvector{ 0,1,2,3,4,4,4,4,4,4,4,4 }, + :int{1}, :int{1}, "", "?", "" + } + // moved from cintltst /tsconv/nccbtst/TestSubWithValueCallBack + { + "iso-2022-jp", + "A\xe9B\xe9\u3000", + :bin{ 41255530304539422555303045391b244221211b2842 }, + :intvector{ 0,1,1,1,1,1,1,2,3,3,3,3,3,3,4,4,4,4,4,4,4,4 }, + :int{1}, :int{1}, "", "&", "" + } + { + "iso-2022-cn", + "\u4e00\u3712\u4e01", + :bin{ 1b2429410e523b0f2555333731320e36210f }, + :intvector{ 0,0,0,0,0,0,0,1,1,1,1,1,1,1,2,2,2,2 }, + :int{1}, :int{1}, "", "&", "" + } + { + "iso-2022-cn", + "A\u3712\u4e00", + :bin{ 412555333731321b2429410e523b0f }, + :intvector{ 0,1,1,1,1,1,1,2,2,2,2,2,2,2,2 }, + :int{1}, :int{1}, "", "&", "" + } + { + "iso-2022-cn", + "\u3000\u3712\u3001", + :bin{ 1b2429410e21210f2555333731320e21220f }, + :intvector{ 0,0,0,0,0,0,0,1,1,1,1,1,1,1,2,2,2,2 }, + :int{1}, :int{1}, "", "&", "" + } + + // moved from cintltst /tsconv/nucnvtst/TestJIS + { + "JIS", + "\uFF81\uFF82\u30EC\u30ED\u30EE\u30EF\uFF93\uFF94\uFF95\uFF96\uFF97\uFF98", + :bin{ 1b244225412544256c256d256e256F25622564256625682569256a1b2842 }, + :intvector{ 0,0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,11,11,11 }, + :int{1}, :int{1}, "", "?", "" + } + { + "JIS7", + "\uFF81\uFF82\u30EC\u30ED\u30EE\u30EF\uFF93\uFF94\uFF95\uFF96\uFF97\uFF98", + :bin{ 0e41420f1b2442256c256d256e256F0e5354555657580f1b2842 }, + :intvector{ 0,0,1,2,2,2,2,2,2,3,3,4,4,5,5,6,6,7,8,9,10,11,11,11,11,11 }, + :int{1}, :int{1}, "", "?", "" + } + { + "JIS8", + "\uFF81\uFF82\u30EC\u30ED\u30EE\u30EF\uFF93\uFF94\uFF95\uFF96\uFF97\uFF98", + :bin{ C1C21b2442256c256d256e256F1b284AD3D4D5D6D7D81b2842 }, + :intvector{ 0,1,2,2,2,2,2,3,3,4,4,5,5,6,6,6,6,7,8,9,10,11,11,11,11 }, + :int{1}, :int{1}, "", "?", "" + } + + // moved from cintltst /tsconv/ncnvtst/TestErrorBehaviour + { + "iso-2022-jp", + "\u3000\x50\udc01\u3001", + :bin{ 1B244221211B2842501A1B24422122 }, + :intvector{ 0,0,0,0,0,1,1,1,1,2,3,3,3,3,3 }, + :int{0}, :int{1}, "", "?", "\udc01" + } + { + "iso-2022-jp", + "\u3000\x50\udc01\u3001", + :bin{ 1B244221211B2842501A1B244221221b2842 }, + :intvector{ 0,0,0,0,0,1,1,1,1,2,3,3,3,3,3,3,3,3 }, + :int{1}, :int{1}, "", "?", "" + } + { + "iso-2022-kr", + "\x61\u4e00\udc01\u4e00", + :bin{ 1b242943610e6c690f1a0e6c69 }, + :intvector{ -1,-1,-1,-1,0,1,1,1,2,2,3,3,3 }, + :int{0}, :int{1}, "", "?", "\udc01" + } + { + "iso-2022-kr", + "\x61\u4e00\udc01\u4e00", + :bin{ 1b242943610e6c690f1a0e6c690f }, + :intvector{ -1,-1,-1,-1,0,1,1,1,2,2,3,3,3,3 }, + :int{1}, :int{1}, "", "?", "" + } + // ISO-2022-KR { "ibm-25546", @@ -442,6 +541,13 @@ conversion { :intvector{ -1,-1,-1,-1,0,1,2,2,2,3,3,5,5,6,6 }, :int{1}, :int{1}, "", "?", "" } + { + "ibm-25546", + "AB\uc88b\U00050005\uaccc", + :bin{ 1b24294341420e41412f7e306a0f }, + :intvector{ -1,-1,-1,-1,0,1,2,2,2,3,3,5,5,5 }, + :int{1}, :int{1}, "", "?", "" + } { "ISO-2022-KR", "AB\uc88b\U00050005\uacccC", @@ -463,13 +569,20 @@ conversion { :intvector{ -1,-1,-1,-1,0,1,2,2,2,3,3,5,5,6,6 }, :int{1}, :int{1}, "", "?\x00\x2f\x7e", "" } + { + "ISO-2022-KR", + "AB\uc88b\U00050005\uaccc", + :bin{ 1b24294341420e41412f7e306a0f }, + :intvector{ -1,-1,-1,-1,0,1,2,2,2,3,3,5,5,5 }, + :int{1}, :int{1}, "", "?\x00\x2f\x7e", "" + } // ISO-2022-JP-2 with G2 designator & SS2 shift { "ISO-2022-JP-2", "CF\u758f\u038f\u7591", - :bin{ 43461b244241411b2e461b4e3f353f }, - :intvector{ 0,1,2,2,2,2,2,3,3,3,3,3,3,4,4 }, + :bin{ 43461b244241411b2e461b4e3f353f1b2842 }, + :intvector{ 0,1,2,2,2,2,2,3,3,3,3,3,3,4,4,4,4,4 }, :int{1}, :int{1}, "", ".", "" } // JIS7 with Katakana @@ -480,12 +593,20 @@ conversion { :intvector{ 0,1,2,2,3,4,4,5 }, :int{1}, :int{1}, "", ".", "" } + // JIS7 with shift to ASCII at the very end + { + "JIS7", + "AB\uff81\uff82", + :bin{ 41420e41420f }, + :intvector{ 0,1,2,2,3,3 }, + :int{1}, :int{1}, "", ".", "" + } // JIS8 with Katakana { "JIS8", "A\uff81\\\xa5\uff82B", - :bin{ 41c15c1b284a5cc242 }, - :intvector{ 0,1,2,3,3,3,3,4,5 }, + :bin{ 41c15c1b284a5cc2421b2842 }, + :intvector{ 0,1,2,3,3,3,3,4,5,5,5,5 }, :int{1}, :int{1}, "", ".", "" } @@ -497,6 +618,22 @@ conversion { :intvector{ 0,0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4,4,6,6 }, :int{1}, :int{1}, "", ".", "" } + // ISO-2022-CN-EXT with shift to ASCII at the very end + { + "ISO-2022-CN-EXT", + "\u4eae\u9f82\u56cd\u56cc\U0002a6d6", + :bin{ 1b2429410e41411b2429457e7c1b242a481b4e70341b2429477c341b242b4d1b4f664c0f }, + :intvector{ 0,0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4 }, + :int{1}, :int{1}, "", ".", "" + } + // ISO-2022-CN-EXT without flush so do not shift to ASCII at the very end + { + "ISO-2022-CN-EXT", + "\u4eae\u9f82\u56cd\u56cc\U0002a6d6", + :bin{ 1b2429410e41411b2429457e7c1b242a481b4e70341b2429477c341b242b4d1b4f664c }, + :intvector{ 0,0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4,4 }, + :int{0}, :int{1}, "", ".", "" + } // windows-936 vs. ibm-1386 {