ICU-5797 use Shift-JIS table for ISO 2022-JP, and hardcode JIS X 0201 mappings

X-SVN-Rev: 22772
This commit is contained in:
Markus Scherer 2007-10-11 21:31:32 +00:00
parent e6ca6a5162
commit cc36611b2f
4 changed files with 399 additions and 101 deletions

View File

@ -472,8 +472,7 @@ _ISO2022Open(UConverter *cnv, const char *name, const char *locale,uint32_t opti
if(jpCharsetMasks[version]&CSM(ISO8859_7)) {
myConverterData->myConverterArray[ISO8859_7]= ucnv_loadSharedData("ISO8859_7", NULL, errorCode);
}
myConverterData->myConverterArray[JISX201] = ucnv_loadSharedData("JISX0201", NULL, errorCode);
myConverterData->myConverterArray[JISX208] = ucnv_loadSharedData("jisx-208", NULL, errorCode);
myConverterData->myConverterArray[JISX208] = ucnv_loadSharedData("Shift-JIS", NULL, errorCode);
if(jpCharsetMasks[version]&CSM(JISX212)) {
myConverterData->myConverterArray[JISX212] = ucnv_loadSharedData("jisx-212", NULL, errorCode);
}
@ -1040,14 +1039,6 @@ MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData,
length=3;
}
}
/*
* TODO(markus): Use Shift-JIS table for JIS X 0208, to save mapping table space.
* Pass in parameter for type of output bytes, for validation and shifting:
* - Direct: Pass bytes through, but forbid control codes 00-1F (except SI/SO/ESC) and space 20?
* (Need to allow some (TAB/LF/CR) or most of them for ASCII and maybe JIS X 0201.)
* - A1-FE: Subtract 80 after range check.
* - SJIS: Shift DBCS result to 21-7E x 21-7E.
*/
/* is this code point assigned, or do we use fallbacks? */
if((stage2Entry&(1<<(16+(c&0xf))))!=0) {
/* assigned */
@ -1105,6 +1096,23 @@ MBCS_SINGLE_FROM_UCHAR32(UConverterSharedData* sharedData,
}
}
/*
* Check that the result is a 2-byte value with each byte in the range A1..FE
* (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte
* to move it to the ISO 2022 range 21..7E.
* Return 0 if out of range.
*/
static U_INLINE uint32_t
_2022FromGR94DBCS(uint32_t value) {
if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) &&
(uint8_t)(value - 0xa1) <= (0xfe - 0xa1)
) {
return value - 0x8080; /* shift down to 21..7e byte range */
} else {
return 0; /* not valid for ISO 2022 */
}
}
#ifdef U_ENABLE_GENERIC_ISO_2022
/**********************************************************************************
@ -1233,7 +1241,7 @@ toUnicodeCallback(UConverter *cnv,
}
else{
cnv->toUBytes[0] =(char) sourceChar;
cnv->toULength = 2;
cnv->toULength = 1;
}
if(targetUniChar == (missingCharMarker-1/*0xfffe*/)){
@ -1344,6 +1352,181 @@ static const int32_t escSeqCharsLen[] ={
* TODO: Implement a priority technique where the users are allowed to set the priority of code pages
*/
/* Map 00..7F to Unicode according to JIS X 0201. */
static U_INLINE uint32_t
jisx201ToU(uint32_t value) {
if(value < 0x5c) {
return value;
} else if(value == 0x5c) {
return 0xa5;
} else if(value == 0x7e) {
return 0x203e;
} else /* value <= 0x7f */ {
return value;
}
}
/* Map Unicode to 00..7F according to JIS X 0201. Return U+FFFE if unmappable. */
static U_INLINE uint32_t
jisx201FromU(uint32_t value) {
if(value<=0x7f) {
if(value!=0x5c && value!=0x7e) {
return value;
}
} else if(value==0xa5) {
return 0x5c;
} else if(value==0x203e) {
return 0x7e;
}
return 0xfffe;
}
/*
* Take a valid Shift-JIS byte pair, check that it is in the range corresponding
* to JIS X 0208, and convert it to a pair of 21..7E bytes.
* Return 0 if the byte pair is out of range.
*/
static U_INLINE uint32_t
_2022FromSJIS(uint32_t value) {
uint8_t trail;
if(value > 0xEFFC) {
return 0; /* beyond JIS X 0208 */
}
trail = (uint8_t)value;
value &= 0xff00; /* lead byte */
if(value <= 0x9f00) {
value -= 0x7000;
} else /* 0xe000 <= value <= 0xef00 */ {
value -= 0xb000;
}
value <<= 1;
if(trail <= 0x9e) {
value -= 0x100;
if(trail <= 0x7e) {
value |= trail - 0x1f;
} else {
value |= trail - 0x20;
}
} else /* trail <= 0xfc */ {
value |= trail - 0x7e;
}
return value;
}
/*
* Convert a pair of JIS X 0208 21..7E bytes to Shift-JIS.
* If either byte is outside 21..7E make sure that the result is not valid
* for Shift-JIS so that the converter catches it.
* Some invalid byte values already turn into equally invalid Shift-JIS
* byte values and need not be tested explicitly.
*/
static U_INLINE void
_2022ToSJIS(uint8_t c1, uint8_t c2, char bytes[2]) {
if(c1&1) {
++c1;
if(c2 <= 0x5f) {
c2 += 0x1f;
} else if(c2 <= 0x7e) {
c2 += 0x20;
} else {
c2 = 0; /* invalid */
}
} else {
if((uint8_t)(c2-0x21) <= (0x7e-0x21)) {
c2 += 0x7e;
} else {
c2 = 0; /* invalid */
}
}
c1 >>= 1;
if(c1 <= 0x2f) {
c1 += 0x70;
} else if(c1 <= 0x3f) {
c1 += 0xb0;
} else {
c1 = 0; /* invalid */
}
bytes[0] = (char)c1;
bytes[1] = (char)c2;
}
/*
* JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS)
* Katakana.
* Now that we use a Shift-JIS table for JIS X 0208 we need to hardcode these fallbacks
* because Shift-JIS roundtrips half-width Katakana to single bytes.
* These were the only fallbacks in ICU's jisx-208.ucm file.
*/
static const uint16_t hwkana_fb[HWKANA_END - HWKANA_START + 1] = {
0x2123, /* U+FF61 */
0x2156,
0x2157,
0x2122,
0x2126,
0x2572,
0x2521,
0x2523,
0x2525,
0x2527,
0x2529,
0x2563,
0x2565,
0x2567,
0x2543,
0x213C, /* U+FF70 */
0x2522,
0x2524,
0x2526,
0x2528,
0x252A,
0x252B,
0x252D,
0x252F,
0x2531,
0x2533,
0x2535,
0x2537,
0x2539,
0x253B,
0x253D,
0x253F, /* U+FF80 */
0x2541,
0x2544,
0x2546,
0x2548,
0x254A,
0x254B,
0x254C,
0x254D,
0x254E,
0x254F,
0x2552,
0x2555,
0x2558,
0x255B,
0x255E,
0x255F, /* U+FF90 */
0x2560,
0x2561,
0x2562,
0x2564,
0x2566,
0x2568,
0x2569,
0x256A,
0x256B,
0x256C,
0x256D,
0x256F,
0x2573,
0x212B,
0x212C /* U+FF9F */
};
static void
UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err) {
UConverter *cnv = args->converter;
@ -1499,7 +1682,7 @@ getTrail:
}
break;
case HWKANA_7BIT:
if((uint32_t)(HWKANA_END-sourceChar)<=(HWKANA_END-HWKANA_START)) {
if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
if(converterData->version==3) {
/* JIS7: use G1 (SO) */
/* Shift U+FF61..U+FF9F to bytes 21..5F. */
@ -1526,13 +1709,34 @@ getTrail:
break;
case JISX201:
/* G0 SBCS */
len2 = MBCS_SINGLE_FROM_UCHAR32(
value = jisx201FromU(sourceChar);
if(value <= 0x7f) {
targetValue = value;
len = 1;
cs = cs0;
g = 0;
useFallback = FALSE;
}
break;
case JISX208:
/* G0 DBCS from Shift-JIS table */
len2 = MBCS_FROM_UCHAR32_ISO2022(
converterData->myConverterArray[cs0],
sourceChar, &value,
useFallback);
if(len2 != 0 && !(len2 < 0 && len != 0) && value <= 0x7f) {
targetValue = value;
len = len2;
useFallback, MBCS_OUTPUT_2);
if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */
value = _2022FromSJIS(value);
if(value != 0) {
targetValue = value;
len = len2;
cs = cs0;
g = 0;
useFallback = FALSE;
}
} else if(len == 0 && useFallback &&
(uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
targetValue = hwkana_fb[sourceChar - HWKANA_START];
len = -2;
cs = cs0;
g = 0;
useFallback = FALSE;
@ -1564,17 +1768,10 @@ getTrail:
* Check for valid bytes for the encoding scheme.
* This is necessary because the sub-converter (windows-949)
* has a broader encoding scheme than is valid for 2022.
*
* Check that the result is a 2-byte value with each byte in the range A1..FE
* (strict EUC-KR DBCS) before accepting it and subtracting 0x80 from each byte
* to move it to the ISO 2022 range 21..7E.
*/
if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) &&
(uint8_t)(value - 0xa1) <= (0xfe - 0xa1)
) {
value -= 0x8080; /* shift down to 21..7e byte range */
} else {
break; /* not valid for ISO 2022 */
value = _2022FromGR94DBCS(value);
if(value == 0) {
break;
}
}
targetValue = value;
@ -1750,7 +1947,7 @@ getTrail:
static void
UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
UErrorCode* err){
char tempBuf[3];
char tempBuf[2];
const char *mySource = (char *) args->source;
UChar *myTarget = args->target;
const char *mySourceLimit = args->sourceLimit;
@ -1868,10 +2065,7 @@ escape:
break;
case JISX201:
if(mySourceChar <= 0x7f) {
targetUniChar =
_MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(
myData->myConverterArray[cs],
mySourceChar);
targetUniChar = jisx201ToU(mySourceChar);
}
break;
case HWKANA_7BIT:
@ -1885,8 +2079,13 @@ escape:
if(mySource < mySourceLimit) {
char trailByte;
getTrailByte:
tempBuf[0] = (char) (mySourceChar);
tempBuf[1] = trailByte = *mySource++;
trailByte = *mySource++;
if(cs == JISX208) {
_2022ToSJIS((uint8_t)mySourceChar, (uint8_t)trailByte, tempBuf);
} else {
tempBuf[0] = (char)mySourceChar;
tempBuf[1] = trailByte;
}
mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte);
targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE);
} else {
@ -3190,6 +3389,9 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
/* open a set and initialize it with code points that are algorithmically round-tripped */
switch(cnvData->locale[0]){
case 'j':
/* include JIS X 0201 which is hardcoded */
sa->add(sa->set, 0xa5);
sa->add(sa->set, 0x203e);
if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
/* include Latin-1 for some variants of JP */
sa->addRange(sa->set, 0, 0xff);
@ -3198,6 +3400,11 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
sa->addRange(sa->set, 0, 0x7f);
}
if(jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT)) {
/*
* TODO(markus): If and when ucnv_getUnicodeSet() supports fallbacks,
* we need to include half-width Katakana for all JP variants because
* JIS X 0208 has hardcoded fallbacks for them.
*/
/* include half-width Katakana for JP */
sa->addRange(sa->set, HWKANA_START, HWKANA_END);
}
@ -3217,15 +3424,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
break;
}
/*
* Version-specific for CN:
* CN version 0 does not map CNS planes 3..7 although
* they are all available in the CNS conversion table;
* CN version 1 does map them all.
* The two versions create different Unicode sets.
*/
for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
if(cnvData->myConverterArray[i]!=NULL) {
#if 0 /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */
if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
cnvData->version==0 && i==CNS_11643
) {
@ -3235,9 +3434,33 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
sa, UCNV_ROUNDTRIP_SET,
0, 0x81, 0x82,
pErrorCode);
} else {
ucnv_MBCSGetUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, pErrorCode);
}
#endif
for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
UConverterSetFilter filter;
if(cnvData->myConverterArray[i]!=NULL) {
if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
cnvData->version==0 && i==CNS_11643
) {
/*
* Version-specific for CN:
* CN version 0 does not map CNS planes 3..7 although
* they are all available in the CNS conversion table;
* CN version 1 (-EXT) does map them all.
* The two versions create different Unicode sets.
*/
filter=UCNV_SET_FILTER_2022_CN;
} else if(cnvData->locale[0]=='j' && i==JISX208) {
/*
* Only add code points that map to Shift-JIS codes
* corresponding to JIS X 0208.
*/
filter=UCNV_SET_FILTER_SJIS;
} else {
filter=UCNV_SET_FILTER_NONE;
}
ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, filter, pErrorCode);
}
}

View File

@ -362,6 +362,8 @@ gb18030Ranges[13][4]={
/* Miscellaneous ------------------------------------------------------------ */
#if 0 /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */
/* similar to ucnv_MBCSGetNextUChar() but recursive */
static void
_getUnicodeSetForBytes(const UConverterSharedData *sharedData,
@ -454,11 +456,14 @@ ucnv_MBCSGetUnicodeSetForBytes(const UConverterSharedData *sharedData,
pErrorCode);
}
#endif
U_CFUNC void
ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
const USetAdder *sa,
UConverterUnicodeSet which,
UErrorCode *pErrorCode) {
ucnv_MBCSGetFilteredUnicodeSetForUnicode(const UConverterSharedData *sharedData,
const USetAdder *sa,
UConverterUnicodeSet which,
UConverterSetFilter filter,
UErrorCode *pErrorCode) {
const UConverterMBCSTable *mbcsTable;
const uint16_t *table;
@ -512,12 +517,26 @@ ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
c+=1024; /* empty stage 2 block */
}
}
} else if(mbcsTable->outputType==MBCS_OUTPUT_DBCS_ONLY) {
/* ignore single-byte results */
} else {
const uint32_t *stage2;
const uint16_t *stage3, *results;
const uint8_t *stage3, *bytes;
uint32_t st3Multiplier;
uint32_t value;
results=(const uint16_t *)mbcsTable->fromUnicodeBytes;
bytes=mbcsTable->fromUnicodeBytes;
switch(mbcsTable->outputType) {
case MBCS_OUTPUT_3:
case MBCS_OUTPUT_4_EUC:
st3Multiplier=3;
break;
case MBCS_OUTPUT_4:
st3Multiplier=4;
break;
default:
st3Multiplier=2;
break;
}
for(st1=0; st1<maxStage1; ++st1) {
st2=table[st1];
@ -526,7 +545,7 @@ ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
for(st2=0; st2<64; ++st2) {
if((st3=stage2[st2])!=0) {
/* read the stage 3 block */
stage3=results+16*(uint32_t)(uint16_t)st3;
stage3=bytes+st3Multiplier*16*(uint32_t)(uint16_t)st3;
/* get the roundtrip flags for the stage 3 block */
st3>>=16;
@ -536,48 +555,50 @@ ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
* Once we get a set for fallback mappings, we have to check
* non-roundtrip stage 3 results for whether they are 0.
* See ucnv_MBCSFromUnicodeWithOffsets() for details.
*
* Ignore single-byte results (<0x100).
*/
do {
if((st3&1)!=0 && *stage3>=0x100) {
sa->add(sa->set, c);
}
st3>>=1;
++stage3;
} while((++c&0xf)!=0);
} else {
c+=16; /* empty stage 3 block */
}
}
} else {
c+=1024; /* empty stage 2 block */
}
}
} else {
const uint32_t *stage2;
for(st1=0; st1<maxStage1; ++st1) {
st2=table[st1];
if(st2>(maxStage1>>1)) {
stage2=(const uint32_t *)table+st2;
for(st2=0; st2<64; ++st2) {
if((st3=stage2[st2])!=0) {
/* get the roundtrip flags for the stage 3 block */
st3>>=16;
/*
* Add code points for which the roundtrip flag is set.
* Once we get a set for fallback mappings, we have to check
* non-roundtrip stage 3 results for whether they are 0.
* See ucnv_MBCSFromUnicodeWithOffsets() for details.
*/
do {
if(st3&1) {
sa->add(sa->set, c);
}
st3>>=1;
} while((++c&0xf)!=0);
switch(filter) {
case UCNV_SET_FILTER_NONE:
do {
if(st3&1) {
sa->add(sa->set, c);
}
st3>>=1;
} while((++c&0xf)!=0);
break;
case UCNV_SET_FILTER_DBCS_ONLY:
/* Ignore single-byte results (<0x100). */
do {
if((st3&1)!=0 && *((const uint16_t *)stage3)>=0x100) {
sa->add(sa->set, c);
}
st3>>=1;
stage3+=2; /* +=st3Multiplier */
} while((++c&0xf)!=0);
break;
case UCNV_SET_FILTER_2022_CN:
/* Only add code points that map to CNS 11643 planes 1 & 2 for non-EXT ISO-2022-CN. */
do {
if((st3&1)!=0 && ((value=*stage3)==0x81 || value==0x82)) {
sa->add(sa->set, c);
}
st3>>=1;
stage3+=3; /* +=st3Multiplier */
} while((++c&0xf)!=0);
break;
case UCNV_SET_FILTER_SJIS:
/* Only add code points that map to Shift-JIS codes corresponding to JIS X 0208. */
do {
if((st3&1)!=0 && (value=*((const uint16_t *)stage3))>=0x8140 && value<=0xeffc) {
sa->add(sa->set, c);
}
st3>>=1;
stage3+=2; /* +=st3Multiplier */
} while((++c&0xf)!=0);
break;
default:
*pErrorCode=U_INTERNAL_PROGRAM_ERROR;
return;
}
} else {
c+=16; /* empty stage 3 block */
}
@ -591,6 +612,19 @@ ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
ucnv_extGetUnicodeSet(sharedData, sa, which, pErrorCode);
}
U_CFUNC void
ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
const USetAdder *sa,
UConverterUnicodeSet which,
UErrorCode *pErrorCode) {
ucnv_MBCSGetFilteredUnicodeSetForUnicode(
sharedData, sa, which,
sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ?
UCNV_SET_FILTER_DBCS_ONLY :
UCNV_SET_FILTER_NONE,
pErrorCode);
}
static void
ucnv_MBCSGetUnicodeSet(const UConverter *cnv,
const USetAdder *sa,

View File

@ -456,6 +456,7 @@ U_CFUNC void
ucnv_MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
UErrorCode *pErrorCode);
#if 0 /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */
/*
* Internal function returning a UnicodeSet for toUnicode() conversion.
* Currently only used for ISO-2022-CN, and only handles roundtrip mappings.
@ -470,6 +471,7 @@ ucnv_MBCSGetUnicodeSetForBytes(const UConverterSharedData *sharedData,
UConverterUnicodeSet which,
uint8_t state, int32_t lowByte, int32_t highByte,
UErrorCode *pErrorCode);
#endif
/*
* Internal function returning a UnicodeSet for toUnicode() conversion.
@ -481,9 +483,30 @@ ucnv_MBCSGetUnicodeSetForBytes(const UConverterSharedData *sharedData,
*/
U_CFUNC void
ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData,
const USetAdder *sa,
UConverterUnicodeSet which,
UErrorCode *pErrorCode);
const USetAdder *sa,
UConverterUnicodeSet which,
UErrorCode *pErrorCode);
typedef enum UConverterSetFilter {
UCNV_SET_FILTER_NONE,
UCNV_SET_FILTER_DBCS_ONLY,
UCNV_SET_FILTER_2022_CN,
UCNV_SET_FILTER_SJIS,
UCNV_SET_FILTER_COUNT
} UConverterSetFilter;
/*
* Same as ucnv_MBCSGetUnicodeSetForUnicode() but
* the set can be filtered by encoding scheme.
* Used by stateful converters which share regular conversion tables
* but only use a subset of their mappings.
*/
U_CFUNC void
ucnv_MBCSGetFilteredUnicodeSetForUnicode(const UConverterSharedData *sharedData,
const USetAdder *sa,
UConverterUnicodeSet which,
UConverterSetFilter filter,
UErrorCode *pErrorCode);
#endif

View File

@ -48,6 +48,15 @@ conversion:table(nofallback) {
toUnicode {
Headers { "charset", "bytes", "unicode", "offsets", "flush", "fallbacks", "errorCode", "callback", "invalidChars" }
Cases {
// improve coverage of ISO-2022-JP converter with hardcoded JIS X 0201 and
// using the Shift-JIS table for JIS X 0208 (ticket #5797)
{
"ISO-2022-JP",
:bin{ 1b284a7d7e801b2442306c20217f7e21202160217f22202225227f5f211b2842 },
"}\u203e\ufffd\u4e00\ufffd\ufffd\ufffd\xf7\ufffd\ufffd\u25b2\ufffd\u6f3e",
:intvector{ 3,4,5,9,11,13,15,17,19,21,23,25,27 },
:int{1}, :int{1}, "", "?", :bin{""}
}
// improve coverage of unrolled loops in ucnvmbcs.c/ucnv_MBCSSingleToBMPWithOffsets()
{
"ISO-8859-3",
@ -495,6 +504,15 @@ conversion:table(nofallback) {
fromUnicode {
Headers { "charset", "unicode", "bytes", "offsets", "flush", "fallbacks", "errorCode", "callback", "invalidUChars" }
Cases {
// improve coverage of ISO-2022-JP converter with hardcoded JIS X 0201 and
// using the Shift-JIS table for JIS X 0208 (ticket #5797)
{
"ISO-2022-JP",
"\u203e\xa5\u4e00\ufa10\u6f3e\u0391",
:bin{ 1b284a7e5c1b2442306c222e5f2126211b2842 },
:intvector{ 0,0,0,0,1,2,2,2,2,2,3,3,4,4,5,5,5,5,5 },
:int{1}, :int{0}, "", "?=\u3013", "" // U+3013 Geta Mark converts to 222e
}
// Verify that mappings that would result in byte values outside 20..7F (for SBCS)
// or 21..7E (for DBCS) are not used.
// ibm-9005_X110-2007.ucm (ISO 8859-7, <ESC>.F=1b2e46):
@ -1293,13 +1311,13 @@ conversion:table(nofallback) {
// versions of ISO-2022-JP
{
"ISO-2022-JP",
"[\x00-\x0d\x10-\x1a\x1c-\x7f\u0391-\u03a1\uff61-\uff9f\u4e00\u4e01\uffe5]",
"[\x0e\x0f\x1b\u0100-\u0113\u0385-\u038a\u4e02\u4e27-\u4e29\uffe6-\U0010ffff]",
"[\x00-\x0d\x10-\x1a\x1c-\x7f\xa5\u0391-\u03a1\u203e\uff61-\uff9f\u4e00\u4e01\uffe5]",
"[\x0e\x0f\x1b\u0100-\u0113\u0385-\u038a\u4e02\u4e27-\u4e29\ufa0e-\ufa2d\uffe6-\U0010ffff]",
:int{0}
}
{
"ISO-2022-JP-2",
"[\x00-\x0d\x10-\x1a\x1c-\u0113\u0384-\u0386\u0388-\u038a\u0390-\u03a1\uff61-\uff9f\u4e00-\u4e05\uffe6]",
"[\x00-\x0d\x10-\x1a\x1c-\u0113\u0384-\u0386\u0388-\u038a\u0390-\u03a1\u203e\uff61-\uff9f\u4e00-\u4e05\uffe6]",
"[\x0e\x0f\x1b\uffe7-\U0010ffff]",
:int{0}
}