ICU-3179 make ISO-2022-xx return to ASCII mode at the very end of the text

X-SVN-Rev: 14071
This commit is contained in:
Markus Scherer 2003-12-10 02:03:42 +00:00
parent be25c30571
commit 07e5fd82bb
5 changed files with 296 additions and 224 deletions

View File

@ -74,6 +74,8 @@
*/
#endif
static const char SHIFT_IN_STR[] = "\x0F";
static const char SHIFT_OUT_STR[] = "\x0E";
static const char UCNV_SS2[] = "\x1B\x4E";
static const char UCNV_SS3[] = "\x1B\x4F";
#define UCNV_SS2_LEN 2
@ -1556,6 +1558,64 @@ getTrail:
}/* end while(mySourceIndex<mySourceLength) */
/*
* the end of the input stream and detection of truncated input
* are handled by the framework, but for ISO-2022-JP conversion
* we need to be in ASCII mode at the very end
*
* conditions:
* successful
* in SO mode or not in ASCII mode
* end of input and no truncated input
*/
if( U_SUCCESS(*err) &&
(pFromU2022State->g!=0 || pFromU2022State->cs[0]!=ASCII) &&
args->flush && source>=sourceLimit && args->converter->fromUChar32==0
) {
int32_t sourceIndex;
outLen = 0;
if(pFromU2022State->g != 0) {
buffer[outLen++] = UCNV_SI;
pFromU2022State->g = 0;
}
if(pFromU2022State->cs[0] != ASCII) {
int32_t escLen = escSeqCharsLen[ASCII];
uprv_memcpy(buffer + outLen, escSeqChars[ASCII], escLen);
outLen += escLen;
pFromU2022State->cs[0] = (int8_t)ASCII;
}
/* get the source index of the last input character */
/*
* TODO this would be simpler and more reliable if we used a pair
* of sourceIndex/prevSourceIndex like in ucnvmbcs.c
* so that we could simply use the prevSourceIndex here;
* this code gives an incorrect result for the rare case of an unmatched
* trail surrogate that is alone in the last buffer of the text stream
*/
sourceIndex=(int32_t)(source-args->source);
if(sourceIndex>0) {
--sourceIndex;
if( U16_IS_TRAIL(args->source[sourceIndex]) &&
(sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
) {
--sourceIndex;
}
} else {
sourceIndex=-1;
}
ucnv_fromUWriteBytes(
args->converter,
buffer, outLen,
(char **)&target, (const char *)targetLimit,
&offsets, sourceIndex,
err);
}
/*save the state and return */
args->source = source;
args->target = (char*)target;
@ -1938,6 +1998,53 @@ getTrail:
}/* end while(mySourceIndex<mySourceLength) */
/*
* the end of the input stream and detection of truncated input
* are handled by the framework, but for ISO-2022-KR conversion
* we need to be in ASCII mode at the very end
*
* conditions:
* successful
* not in ASCII mode
* end of input and no truncated input
*/
if( U_SUCCESS(*err) &&
isTargetByteDBCS &&
args->flush && source>=sourceLimit && args->converter->fromUChar32==0
) {
int32_t sourceIndex;
/* we are switching to ASCII */
isTargetByteDBCS=FALSE;
/* get the source index of the last input character */
/*
* TODO this would be simpler and more reliable if we used a pair
* of sourceIndex/prevSourceIndex like in ucnvmbcs.c
* so that we could simply use the prevSourceIndex here;
* this code gives an incorrect result for the rare case of an unmatched
* trail surrogate that is alone in the last buffer of the text stream
*/
sourceIndex=(int32_t)(source-args->source);
if(sourceIndex>0) {
--sourceIndex;
if( U16_IS_TRAIL(args->source[sourceIndex]) &&
(sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
) {
--sourceIndex;
}
} else {
sourceIndex=-1;
}
ucnv_fromUWriteBytes(
args->converter,
SHIFT_IN_STR, 1,
(char **)&target, (const char *)targetLimit,
&offsets, sourceIndex,
err);
}
/*save the state and return */
args->source = source;
args->target = (char*)target;
@ -2230,8 +2337,6 @@ getTrailByte:
*/
/* The following are defined this way to make the strings truely readonly */
static const char SHIFT_IN_STR[] = "\x0F";
static const char SHIFT_OUT_STR[] = "\x0E";
static const char GB_2312_80_STR[] = "\x1B\x24\x29\x41";
static const char ISO_IR_165_STR[] = "\x1B\x24\x29\x45";
static const char CNS_11643_1992_Plane_1_STR[] = "\x1B\x24\x29\x47";
@ -2509,6 +2614,53 @@ getTrail:
}/* end while(mySourceIndex<mySourceLength) */
/*
* the end of the input stream and detection of truncated input
* are handled by the framework, but for ISO-2022-CN conversion
* we need to be in ASCII mode at the very end
*
* conditions:
* successful
* not in ASCII mode
* end of input and no truncated input
*/
if( U_SUCCESS(*err) &&
pFromU2022State->g!=0 &&
args->flush && source>=sourceLimit && args->converter->fromUChar32==0
) {
int32_t sourceIndex;
/* we are switching to ASCII */
pFromU2022State->g=0;
/* get the source index of the last input character */
/*
* TODO this would be simpler and more reliable if we used a pair
* of sourceIndex/prevSourceIndex like in ucnvmbcs.c
* so that we could simply use the prevSourceIndex here;
* this code gives an incorrect result for the rare case of an unmatched
* trail surrogate that is alone in the last buffer of the text stream
*/
sourceIndex=(int32_t)(source-args->source);
if(sourceIndex>0) {
--sourceIndex;
if( U16_IS_TRAIL(args->source[sourceIndex]) &&
(sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))
) {
--sourceIndex;
}
} else {
sourceIndex=-1;
}
ucnv_fromUWriteBytes(
args->converter,
SHIFT_IN_STR, 1,
(char **)&target, (const char *)targetLimit,
&offsets, sourceIndex,
err);
}
/*save the state and return */
args->source = source;
args->target = (char*)target;

View File

@ -273,14 +273,6 @@ static void TestSkip(int32_t inputsize, int32_t outputsize)
};
static const int32_t from_iso_2022_jpOffs [] ={0,2};
static const UChar iso_2022_jp_inputText1[]={0x3000, 0x00E9, 0x3001, };
static const uint8_t to_iso_2022_jp1[]={
0x1b, 0x24, 0x42, 0x21, 0x21,
0x21, 0x22,
};
static const int32_t from_iso_2022_jpOffs1 [] ={0,0,0,0,0,2,2,};
/*ISO-2022-JP*/
UChar const iso_2022_jp_inputText2[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
static const uint8_t to_iso_2022_jp2[]={
@ -427,10 +419,6 @@ static void TestSkip(int32_t inputsize, int32_t outputsize)
UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs, NULL, 0 ))
log_err("u-> iso-2022-jp with skip did not match.\n");
if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]),
to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp",
UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs1, NULL, 0 ))
log_err("u-> iso-2022-jp with skip did not match.\n");
/* with context */
if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]),
to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp",
@ -1379,24 +1367,6 @@ static void TestSub(int32_t inputsize, int32_t outputsize)
0x61, 0xe6, 0xca, 0x8a,
};
/*ISO-2022-JP*/
static const UChar iso_2022_jp_inputText[]={ 0x0041, 0x00E9, 0x0042,0x00E9,0x3000 };
static const uint8_t to_iso_2022_jp[]={
0x41,
0x1a,
0x42,
0x1a,
0x1b, 0x24, 0x42, 0x21, 0x21,
};
static const int32_t from_iso_2022_jpOffs [] ={
0,
1,
2,
3,
4,4,4,4,4
};
static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,};
if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
@ -1413,13 +1383,6 @@ static void TestSub(int32_t inputsize, int32_t outputsize)
to_euc_tw, sizeof(to_euc_tw), "euc-tw",
UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
log_err("u-> euc-tw with substitute did not match.\n");
if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]),
to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp",
UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_iso_2022_jpOffs, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
log_err("u-> iso-2022-jp with substitute did not match.\n");
}
log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
@ -1843,22 +1806,6 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize)
6, 7, 7, 8,
};
/*ISO-2022-JP*/
static const UChar iso_2022_jp_inputText[]={ 0x0041, 0x00E9, 0x0042,0x00E9,0x3000 };
static const uint8_t to_iso_2022_jp[]={
0x41,
0x25, 0x55, 0x30, 0x30, 0x45, 0x39,
0x42,
0x25, 0x55, 0x30, 0x30, 0x45, 0x39,
0x1b, 0x24, 0x42, 0x21, 0x21,
};
static const int32_t from_iso_2022_jpOffs [] ={
0,
1,1,1,1,1,1,
2,
3,3,3,3,3,3,
4,4,4,4,4
};
static const UChar iso_2022_jp_inputText1[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ;
static const uint8_t to_iso_2022_jp1[]={
0x1b, 0x24, 0x42, 0x21, 0x21,
@ -1908,39 +1855,6 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize)
1,1,1,1,1,1,
2,
};
static const UChar iso_2022_cn_inputText1[]={ 0x4e00, 0x3712, 0x4e01, };
static const uint8_t to_iso_2022_cn1[]={
0x1b, 0x24, 0x29, 0x41, 0x0e, 0x52, 0x3b,
0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32,
0x0e, 0x36, 0x21,
};
static const int32_t from_iso_2022_cnOffs1 [] ={
0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1,
2, 2, 2,
};
static const UChar iso_2022_cn_inputText3[]={ 0x3000, 0x3712, 0x3001, };
static const uint8_t to_iso_2022_cn3[]={
0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32,
0x0e, 0x21, 0x22,
};
static const int32_t from_iso_2022_cnOffs3 [] ={
0,0,0,0,0,0,0,
1,1,1,1,1,1,1,
2,2,2
};
static const UChar iso_2022_cn_inputText2[]={ 0x0041, 0x3712, 0x4e00, };
static const uint8_t to_iso_2022_cn2[]={
0x41,
0x25, 0x55, 0x33, 0x37, 0x31, 0x32,
0x1b, 0x24, 0x29, 0x41, 0x0e, 0x52, 0x3b,
};
static const int32_t from_iso_2022_cnOffs2 [] ={
0,
1,1,1,1,1,1,
2,2,2,2,2,2,2
};
static const UChar iso_2022_cn_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042};
@ -2095,11 +2009,6 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize)
UCNV_FROM_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0 ))
log_err("u-> euc-tw with subst with value did not match.\n");
if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]),
to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp",
UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0 ))
log_err("u-> iso_2022_jp with subst with value did not match.\n");
if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]),
to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp",
UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
@ -2252,18 +2161,6 @@ static void TestSubWithValue(int32_t inputsize, int32_t outputsize)
UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0 ))
log_err("u-> iso_2022_cn with subst with value did not match.\n");
if(!testConvertFromUnicode(iso_2022_cn_inputText1, sizeof(iso_2022_cn_inputText1)/sizeof(iso_2022_cn_inputText1[0]),
to_iso_2022_cn1, sizeof(to_iso_2022_cn1), "iso-2022-cn",
UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs1, NULL, 0 ))
log_err("u-> iso_2022_cn with subst with value did not match.\n");
if(!testConvertFromUnicode(iso_2022_cn_inputText2, sizeof(iso_2022_cn_inputText2)/sizeof(iso_2022_cn_inputText2[0]),
to_iso_2022_cn2, sizeof(to_iso_2022_cn2), "iso-2022-cn",
UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs2, NULL, 0 ))
log_err("u-> iso_2022_cn with subst with value did not match.\n");
if(!testConvertFromUnicode(iso_2022_cn_inputText3, sizeof(iso_2022_cn_inputText3)/sizeof(iso_2022_cn_inputText3[0]),
to_iso_2022_cn3, sizeof(to_iso_2022_cn3), "iso-2022-cn",
UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs3, NULL, 0 ))
log_err("u-> iso_2022_cn with subst with value did not match.\n");
if(!testConvertFromUnicode(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]),
to_iso_2022_cn4, sizeof(to_iso_2022_cn4), "iso-2022-cn",
UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4, NULL, 0 ))

View File

@ -438,10 +438,6 @@ static void TestErrorBehaviour(){
static const uint8_t expected2[] = { 0x31,0x1A,0x32};
static const int32_t offsets2[] = { 0x00,0x01,0x02};
static const UChar sampleText3MBCS[] = { 0x3000, 0x0050, 0xdc01,0x3001};
static const uint8_t expected3MBCS[] = { 0x1B, 0x24, 0x42, 0x21, 0x21, 0x1B, 0x28, 0x42, 0x50, 0x1A, 0x1B, 0x24, 0x42, 0x21, 0x22,};
static const int32_t offsets3MBCS[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03,};
static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01};
static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x42, 0x30, 0x6c,0x1b,0x28,0x42,0x1a};
static const int32_t offsets4MBCS[] = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01,0x02,0x02,0x02,0x02 };
@ -462,13 +458,6 @@ static void TestErrorBehaviour(){
expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR))
log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n");
if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
expected3MBCS, sizeof(expected3MBCS), "iso-2022-jp", offsets3MBCS, TRUE, U_ZERO_ERROR))
log_err("u->iso-2022-jp [UCNV_MBCS] \n");
if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
expected3MBCS, sizeof(expected3MBCS), "iso-2022-jp", offsets3MBCS, FALSE, U_ZERO_ERROR))
log_err("u-> iso-2022-jp[UCNV_MBCS] \n");
if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, TRUE, U_ZERO_ERROR))
log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
@ -542,13 +531,6 @@ static void TestErrorBehaviour(){
static const uint8_t expected3MBCS[] = { 0x1b, 0x24, 0x29, 0x43, 0x51, 0x50, 0x1A };
static const int32_t offsets3MBCS[] = { -1, -1, -1, -1, 0x00, 0x01, 0x02, 0x02 };
static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01,0x4e00};
static const uint8_t expected4MBCS[] = { 0x1b, 0x24, 0x29, 0x43,
0x61,
0x0e, 0x6c, 0x69,
0x0f, 0x1a,
0x0e, 0x6c, 0x69,};
static const int32_t offsets4MBCS[] = { -1, -1, -1, -1, 0x00, 0x01 ,0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03 };
if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
expectedSUB, sizeof(expectedSUB), "iso-2022-kr", offsets, TRUE, U_ZERO_ERROR))
log_err("u-> iso-2022-kr [UCNV_MBCS] \n");
@ -572,13 +554,6 @@ static void TestErrorBehaviour(){
if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]),
expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, FALSE, U_ZERO_ERROR))
log_err("u-> iso-2022-kr[UCNV_MBCS] \n");
if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
expected4MBCS, sizeof(expected4MBCS), "iso-2022-kr", offsets4MBCS, TRUE, U_ZERO_ERROR))
log_err("u-> iso-2022-kr [UCNV_MBCS] \n");
if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]),
expected4MBCS, sizeof(expected4MBCS), "iso-2022-kr", offsets4MBCS, FALSE, U_ZERO_ERROR))
log_err("u-> iso-2022-kr [UCNV_MBCS] \n");
}
/*HZ*/

View File

@ -3983,96 +3983,7 @@ static void TestJitterbug2411(){
static void
TestJIS(){
/* From Unicode */
{
/* JIS Encoding */
UChar sampleTextJIS[] ={
0xFF81, 0xFF82,
0x30EC, 0x30ED,
0x30EE, 0x30EF,
0xFF93, 0xFF94,
0xFF95, 0xFF96,
0xFF97, 0xFF98
};
const uint8_t expectedISO2022JIS[] ={
0x1b, 0x24, 0x42,
0x25, 0x41, 0x25, 0x44,
0x25, 0x6c, 0x25, 0x6d,
0x25, 0x6e, 0x25, 0x6F,
0x25, 0x62, 0x25, 0x64,
0x25, 0x66, 0x25, 0x68,
0x25, 0x69, 0x25, 0x6a
};
int32_t fmISO2022JISOffs[] ={
0,0,0,
0,0,1,1,
2,2,3,3,
4,4,5,5,
6,6,7,7,
8,8,9,9,
10,10,11,11
};
/* JIS7 Encoding */
const uint8_t expectedISO2022JIS7[] ={
0x0e,
0x41, 0x42,
0x0f, 0x1b, 0x24, 0x42,
0x25, 0x6c, 0x25, 0x6d,
0x25, 0x6e, 0x25, 0x6F,
0x0e,
0x53, 0x54,
0x55, 0x56,
0x57, 0x58
};
int32_t fmISO2022JIS7Offs[] ={
0,
0,1,
2,2,2,2,
2,2,3,3,
4,4,5,5,
6,
6,7,
8,9,
10,11
};
/* JIS8 Encoding */
const uint8_t expectedISO2022JIS8[] ={
0xC1, 0xC2,
0x1b, 0x24, 0x42,
0x25, 0x6c, 0x25, 0x6d,
0x25, 0x6e, 0x25, 0x6F,
0x1b, 0x28, 0x4A,
0xD3, 0xD4,
0xD5, 0xD6,
0xD7, 0xD8
};
int32_t fmISO2022JIS8Offs[] ={
0,1,
2,2,2,
2,2,3,3,
4,4,5,5,
6,6,6,
6,7,
8,9,
10,11
};
testConvertFromU(sampleTextJIS, sizeof(sampleTextJIS)/sizeof(sampleTextJIS[0]),
expectedISO2022JIS, sizeof(expectedISO2022JIS), "JIS", fmISO2022JISOffs,TRUE );
testConvertFromU(sampleTextJIS, sizeof(sampleTextJIS)/sizeof(sampleTextJIS[0]),
expectedISO2022JIS7, sizeof(expectedISO2022JIS7), "JIS7", fmISO2022JIS7Offs,FALSE );
testConvertFromU(sampleTextJIS, sizeof(sampleTextJIS)/sizeof(sampleTextJIS[0]),
expectedISO2022JIS8, sizeof(expectedISO2022JIS8), "JIS8", fmISO2022JIS8Offs,FALSE );
}
/* From Unicode moved to testdata/conversion.txt */
/*To Unicode*/
{
const uint8_t sampleTextJIS[] = {

View File

@ -420,6 +420,105 @@ conversion {
fromUnicode {
Headers { "charset", "unicode", "bytes", "offsets", "flush", "fallbacks", "errorCode", "callback", "invalidUChars" }
Cases {
// moved from cintltst /tsconv/nccbtst/TestSkipCallBack
{
"iso-2022-jp",
"\u3000\xe9\u3001",
:bin{ 1b2442212121221b2842 },
:intvector{ 0,0,0,0,0,2,2,2,2,2 },
:int{1}, :int{1}, "", "0", ""
}
// moved from cintltst /tsconv/nccbtst/TestSubCallBack
{
"iso-2022-jp",
"A\xe9B\xe9\u3000",
:bin{ 411a421a1b244221211b2842 },
:intvector{ 0,1,2,3,4,4,4,4,4,4,4,4 },
:int{1}, :int{1}, "", "?", ""
}
// moved from cintltst /tsconv/nccbtst/TestSubWithValueCallBack
{
"iso-2022-jp",
"A\xe9B\xe9\u3000",
:bin{ 41255530304539422555303045391b244221211b2842 },
:intvector{ 0,1,1,1,1,1,1,2,3,3,3,3,3,3,4,4,4,4,4,4,4,4 },
:int{1}, :int{1}, "", "&", ""
}
{
"iso-2022-cn",
"\u4e00\u3712\u4e01",
:bin{ 1b2429410e523b0f2555333731320e36210f },
:intvector{ 0,0,0,0,0,0,0,1,1,1,1,1,1,1,2,2,2,2 },
:int{1}, :int{1}, "", "&", ""
}
{
"iso-2022-cn",
"A\u3712\u4e00",
:bin{ 412555333731321b2429410e523b0f },
:intvector{ 0,1,1,1,1,1,1,2,2,2,2,2,2,2,2 },
:int{1}, :int{1}, "", "&", ""
}
{
"iso-2022-cn",
"\u3000\u3712\u3001",
:bin{ 1b2429410e21210f2555333731320e21220f },
:intvector{ 0,0,0,0,0,0,0,1,1,1,1,1,1,1,2,2,2,2 },
:int{1}, :int{1}, "", "&", ""
}
// moved from cintltst /tsconv/nucnvtst/TestJIS
{
"JIS",
"\uFF81\uFF82\u30EC\u30ED\u30EE\u30EF\uFF93\uFF94\uFF95\uFF96\uFF97\uFF98",
:bin{ 1b244225412544256c256d256e256F25622564256625682569256a1b2842 },
:intvector{ 0,0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,11,11,11 },
:int{1}, :int{1}, "", "?", ""
}
{
"JIS7",
"\uFF81\uFF82\u30EC\u30ED\u30EE\u30EF\uFF93\uFF94\uFF95\uFF96\uFF97\uFF98",
:bin{ 0e41420f1b2442256c256d256e256F0e5354555657580f1b2842 },
:intvector{ 0,0,1,2,2,2,2,2,2,3,3,4,4,5,5,6,6,7,8,9,10,11,11,11,11,11 },
:int{1}, :int{1}, "", "?", ""
}
{
"JIS8",
"\uFF81\uFF82\u30EC\u30ED\u30EE\u30EF\uFF93\uFF94\uFF95\uFF96\uFF97\uFF98",
:bin{ C1C21b2442256c256d256e256F1b284AD3D4D5D6D7D81b2842 },
:intvector{ 0,1,2,2,2,2,2,3,3,4,4,5,5,6,6,6,6,7,8,9,10,11,11,11,11 },
:int{1}, :int{1}, "", "?", ""
}
// moved from cintltst /tsconv/ncnvtst/TestErrorBehaviour
{
"iso-2022-jp",
"\u3000\x50\udc01\u3001",
:bin{ 1B244221211B2842501A1B24422122 },
:intvector{ 0,0,0,0,0,1,1,1,1,2,3,3,3,3,3 },
:int{0}, :int{1}, "", "?", "\udc01"
}
{
"iso-2022-jp",
"\u3000\x50\udc01\u3001",
:bin{ 1B244221211B2842501A1B244221221b2842 },
:intvector{ 0,0,0,0,0,1,1,1,1,2,3,3,3,3,3,3,3,3 },
:int{1}, :int{1}, "", "?", ""
}
{
"iso-2022-kr",
"\x61\u4e00\udc01\u4e00",
:bin{ 1b242943610e6c690f1a0e6c69 },
:intvector{ -1,-1,-1,-1,0,1,1,1,2,2,3,3,3 },
:int{0}, :int{1}, "", "?", "\udc01"
}
{
"iso-2022-kr",
"\x61\u4e00\udc01\u4e00",
:bin{ 1b242943610e6c690f1a0e6c690f },
:intvector{ -1,-1,-1,-1,0,1,1,1,2,2,3,3,3,3 },
:int{1}, :int{1}, "", "?", ""
}
// ISO-2022-KR
{
"ibm-25546",
@ -442,6 +541,13 @@ conversion {
:intvector{ -1,-1,-1,-1,0,1,2,2,2,3,3,5,5,6,6 },
:int{1}, :int{1}, "", "?", ""
}
{
"ibm-25546",
"AB\uc88b\U00050005\uaccc",
:bin{ 1b24294341420e41412f7e306a0f },
:intvector{ -1,-1,-1,-1,0,1,2,2,2,3,3,5,5,5 },
:int{1}, :int{1}, "", "?", ""
}
{
"ISO-2022-KR",
"AB\uc88b\U00050005\uacccC",
@ -463,13 +569,20 @@ conversion {
:intvector{ -1,-1,-1,-1,0,1,2,2,2,3,3,5,5,6,6 },
:int{1}, :int{1}, "", "?\x00\x2f\x7e", ""
}
{
"ISO-2022-KR",
"AB\uc88b\U00050005\uaccc",
:bin{ 1b24294341420e41412f7e306a0f },
:intvector{ -1,-1,-1,-1,0,1,2,2,2,3,3,5,5,5 },
:int{1}, :int{1}, "", "?\x00\x2f\x7e", ""
}
// ISO-2022-JP-2 with G2 designator & SS2 shift
{
"ISO-2022-JP-2",
"CF\u758f\u038f\u7591",
:bin{ 43461b244241411b2e461b4e3f353f },
:intvector{ 0,1,2,2,2,2,2,3,3,3,3,3,3,4,4 },
:bin{ 43461b244241411b2e461b4e3f353f1b2842 },
:intvector{ 0,1,2,2,2,2,2,3,3,3,3,3,3,4,4,4,4,4 },
:int{1}, :int{1}, "", ".", ""
}
// JIS7 with Katakana
@ -480,12 +593,20 @@ conversion {
:intvector{ 0,1,2,2,3,4,4,5 },
:int{1}, :int{1}, "", ".", ""
}
// JIS7 with shift to ASCII at the very end
{
"JIS7",
"AB\uff81\uff82",
:bin{ 41420e41420f },
:intvector{ 0,1,2,2,3,3 },
:int{1}, :int{1}, "", ".", ""
}
// JIS8 with Katakana
{
"JIS8",
"A\uff81\\\xa5\uff82B",
:bin{ 41c15c1b284a5cc242 },
:intvector{ 0,1,2,3,3,3,3,4,5 },
:bin{ 41c15c1b284a5cc2421b2842 },
:intvector{ 0,1,2,3,3,3,3,4,5,5,5,5 },
:int{1}, :int{1}, "", ".", ""
}
@ -497,6 +618,22 @@ conversion {
:intvector{ 0,0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4,4,6,6 },
:int{1}, :int{1}, "", ".", ""
}
// ISO-2022-CN-EXT with shift to ASCII at the very end
{
"ISO-2022-CN-EXT",
"\u4eae\u9f82\u56cd\u56cc\U0002a6d6",
:bin{ 1b2429410e41411b2429457e7c1b242a481b4e70341b2429477c341b242b4d1b4f664c0f },
:intvector{ 0,0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4 },
:int{1}, :int{1}, "", ".", ""
}
// ISO-2022-CN-EXT without flush so do not shift to ASCII at the very end
{
"ISO-2022-CN-EXT",
"\u4eae\u9f82\u56cd\u56cc\U0002a6d6",
:bin{ 1b2429410e41411b2429457e7c1b242a481b4e70341b2429477c341b242b4d1b4f664c },
:intvector{ 0,0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,4,4 },
:int{0}, :int{1}, "", ".", ""
}
// windows-936 vs. ibm-1386
{