ICU-3446 fix use of .cnv unicodeMask - only for base table

X-SVN-Rev: 14137
This commit is contained in:
Markus Scherer 2003-12-15 00:40:24 +00:00
parent a6ebd5da94
commit 1cc2c34850
5 changed files with 171 additions and 147 deletions

View File

@ -960,13 +960,14 @@ MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData,
int32_t *length,
int outputType)
{
const uint16_t *table=sharedData->mbcs.fromUnicodeTable;
const int32_t *cx;
const uint16_t *table;
uint32_t stage2Entry;
uint32_t myValue=0;
uint32_t myValue;
const uint8_t *p;
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
if(c<0x10000 || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
table=sharedData->mbcs.fromUnicodeTable;
stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
/* get the bytes and the length for the output */
if(outputType==MBCS_OUTPUT_2){
@ -976,7 +977,7 @@ MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData,
} else {
*length=2;
}
}else if(outputType==MBCS_OUTPUT_3){
} else /* outputType==MBCS_OUTPUT_3 */ {
p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
myValue=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
if(myValue<=0xff) {
@ -998,18 +999,18 @@ MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData,
*/
/* assigned */
*value=myValue;
} else {
const int32_t *cx=sharedData->mbcs.extIndexes;
if(cx!=NULL) {
*length=ucnv_extSimpleMatchFromU(cx, c, value, useFallback);
} else {
/* unassigned */
*length=0;
}
return;
}
}else{
*length=0;
}
cx=sharedData->mbcs.extIndexes;
if(cx!=NULL) {
*length=ucnv_extSimpleMatchFromU(cx, c, value, useFallback);
return;
}
/* unassigned */
*length=0;
}
/* This inline function replicates code in _MBCSSingleFromUChar32() function in ucnvmbcs.c

View File

@ -136,7 +136,12 @@
* 15.. 8 maximum output UChars
* 7.. 0 maximum UChars per byte
*
* [19]..[30] reserved, 0
* [19] Bit field containing flags:
* (extension table unicodeMask)
* 1 UCNV_HAS_SURROGATES flag for the extension table
* 0 UCNV_HAS_SUPPLEMENTARY flag for the extension table
*
* [20]..[30] reserved, 0
* [31] number of bytes for the entire extension structure
* [>31] reserved; there are indexes[0] indexes
*
@ -317,8 +322,9 @@ enum {
UCNV_EXT_COUNT_BYTES, /* 17 */
UCNV_EXT_COUNT_UCHARS,
UCNV_EXT_FLAGS,
UCNV_EXT_RESERVED_INDEX, /* 19, moves with additional indexes */
UCNV_EXT_RESERVED_INDEX, /* 20, moves with additional indexes */
UCNV_EXT_SIZE=31,
UCNV_EXT_INDEXES_MIN_LENGTH=32

View File

@ -1108,7 +1108,16 @@ _MBCSLoad(UConverterSharedData *sharedData,
mbcsTable->outputType=MBCS_OUTPUT_DBCS_ONLY;
}
}
/*
* unlike below for files with base tables, do not get the unicodeMask
* from the sharedData; instead, use the base table's unicodeMask,
* which we copied in the memcpy above;
* this is necessary because the static data unicodeMask, especially
* the UCNV_HAS_SUPPLEMENTARY flag, is part of the base table data
*/
} else {
/* conversion file with a base table; an additional extension table is optional */
/* make sure that the output type is known */
switch(mbcsTable->outputType) {
case MBCS_OUTPUT_1:
@ -1134,20 +1143,20 @@ _MBCSLoad(UConverterSharedData *sharedData,
mbcsTable->fromUnicodeTable=(const uint16_t *)(raw+header->offsetFromUTable);
mbcsTable->fromUnicodeBytes=(const uint8_t *)(raw+header->offsetFromUBytes);
mbcsTable->fromUBytesLength=header->fromUBytesLength;
}
/*
* converter versions 6.1 and up contain a unicodeMask that is
* used here to select the most efficient function implementations
*/
info.size=sizeof(UDataInfo);
udata_getInfo((UDataMemory *)sharedData->dataMemory, &info);
if(info.formatVersion[0]>6 || (info.formatVersion[0]==6 && info.formatVersion[1]>=1)) {
/* mask off possible future extensions to be safe */
mbcsTable->unicodeMask=(uint8_t)(sharedData->staticData->unicodeMask&3);
} else {
/* for older versions, assume worst case: contains anything possible (prevent over-optimizations) */
mbcsTable->unicodeMask=UCNV_HAS_SUPPLEMENTARY|UCNV_HAS_SURROGATES;
/*
* converter versions 6.1 and up contain a unicodeMask that is
* used here to select the most efficient function implementations
*/
info.size=sizeof(UDataInfo);
udata_getInfo((UDataMemory *)sharedData->dataMemory, &info);
if(info.formatVersion[0]>6 || (info.formatVersion[0]==6 && info.formatVersion[1]>=1)) {
/* mask off possible future extensions to be safe */
mbcsTable->unicodeMask=(uint8_t)(sharedData->staticData->unicodeMask&3);
} else {
/* for older versions, assume worst case: contains anything possible (prevent over-optimizations) */
mbcsTable->unicodeMask=UCNV_HAS_SUPPLEMENTARY|UCNV_HAS_SURROGATES;
}
}
}
@ -3701,134 +3710,133 @@ U_CFUNC int32_t
_MBCSFromUChar32(UConverterSharedData *sharedData,
UChar32 c, uint32_t *pValue,
UBool useFallback) {
const uint16_t *table=sharedData->mbcs.fromUnicodeTable;
const int32_t *cx;
const uint16_t *table;
const uint8_t *p;
uint32_t stage2Entry;
uint32_t value;
int32_t length;
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
return 0;
}
if(c<=0xffff || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {
table=sharedData->mbcs.fromUnicodeTable;
/* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
if(sharedData->mbcs.outputType==MBCS_OUTPUT_1) {
value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
/* is this code point assigned, or do we use fallbacks? */
if(useFallback ? value>=0x800 : value>=0xc00) {
*pValue=value&0xff;
return 1;
} else {
return 0;
/* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */
if(sharedData->mbcs.outputType==MBCS_OUTPUT_1) {
value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);
/* is this code point assigned, or do we use fallbacks? */
if(useFallback ? value>=0x800 : value>=0xc00) {
*pValue=value&0xff;
return 1;
}
} else /* outputType!=MBCS_OUTPUT_1 */ {
stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
/* get the bytes and the length for the output */
switch(sharedData->mbcs.outputType) {
case MBCS_OUTPUT_2:
value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
if(value<=0xff) {
length=1;
} else {
length=2;
}
break;
case MBCS_OUTPUT_DBCS_ONLY:
/* table with single-byte results, but only DBCS mappings used */
value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
if(value<=0xff) {
/* no mapping or SBCS result, not taken for DBCS-only */
value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
length=0;
} else {
length=2;
}
break;
case MBCS_OUTPUT_3:
p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
if(value<=0xff) {
length=1;
} else if(value<=0xffff) {
length=2;
} else {
length=3;
}
break;
case MBCS_OUTPUT_4:
value=MBCS_VALUE_4_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
if(value<=0xff) {
length=1;
} else if(value<=0xffff) {
length=2;
} else if(value<=0xffffff) {
length=3;
} else {
length=4;
}
break;
case MBCS_OUTPUT_3_EUC:
value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
/* EUC 16-bit fixed-length representation */
if(value<=0xff) {
length=1;
} else if((value&0x8000)==0) {
value|=0x8e8000;
length=3;
} else if((value&0x80)==0) {
value|=0x8f0080;
length=3;
} else {
length=2;
}
break;
case MBCS_OUTPUT_4_EUC:
p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
/* EUC 16-bit fixed-length representation applied to the first two bytes */
if(value<=0xff) {
length=1;
} else if(value<=0xffff) {
length=2;
} else if((value&0x800000)==0) {
value|=0x8e800000;
length=4;
} else if((value&0x8000)==0) {
value|=0x8f008000;
length=4;
} else {
length=3;
}
break;
default:
/* must not occur */
return -1;
}
/* is this code point assigned, or do we use fallbacks? */
if( MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) ||
(FROM_U_USE_FALLBACK(useFallback, c) && value!=0)
) {
/*
* We allow a 0 byte output if the "assigned" bit is set for this entry.
* There is no way with this data structure for fallback output
* to be a zero byte.
*/
/* assigned */
*pValue=value;
return length;
}
}
}
stage2Entry=MBCS_STAGE_2_FROM_U(table, c);
/* get the bytes and the length for the output */
switch(sharedData->mbcs.outputType) {
case MBCS_OUTPUT_2:
value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
if(value<=0xff) {
length=1;
} else {
length=2;
}
break;
case MBCS_OUTPUT_DBCS_ONLY:
/* table with single-byte results, but only DBCS mappings used */
value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
if(value<=0xff) {
/* no mapping or SBCS result, not taken for DBCS-only */
value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */
length=0;
} else {
length=2;
}
break;
case MBCS_OUTPUT_3:
p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
if(value<=0xff) {
length=1;
} else if(value<=0xffff) {
length=2;
} else {
length=3;
}
break;
case MBCS_OUTPUT_4:
value=MBCS_VALUE_4_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
if(value<=0xff) {
length=1;
} else if(value<=0xffff) {
length=2;
} else if(value<=0xffffff) {
length=3;
} else {
length=4;
}
break;
case MBCS_OUTPUT_3_EUC:
value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
/* EUC 16-bit fixed-length representation */
if(value<=0xff) {
length=1;
} else if((value&0x8000)==0) {
value|=0x8e8000;
length=3;
} else if((value&0x80)==0) {
value|=0x8f0080;
length=3;
} else {
length=2;
}
break;
case MBCS_OUTPUT_4_EUC:
p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);
value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
/* EUC 16-bit fixed-length representation applied to the first two bytes */
if(value<=0xff) {
length=1;
} else if(value<=0xffff) {
length=2;
} else if((value&0x800000)==0) {
value|=0x8e800000;
length=4;
} else if((value&0x8000)==0) {
value|=0x8f008000;
length=4;
} else {
length=3;
}
break;
default:
/* must not occur */
return -1;
cx=sharedData->mbcs.extIndexes;
if(cx!=NULL) {
return ucnv_extSimpleMatchFromU(cx, c, pValue, useFallback);
}
/* is this code point assigned, or do we use fallbacks? */
if( MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) ||
(FROM_U_USE_FALLBACK(useFallback, c) && value!=0)
) {
/*
* We allow a 0 byte output if the "assigned" bit is set for this entry.
* There is no way with this data structure for fallback output
* to be a zero byte.
*/
/* assigned */
*pValue=value;
return length;
} else {
const int32_t *cx=sharedData->mbcs.extIndexes;
if(cx!=NULL) {
return ucnv_extSimpleMatchFromU(cx, c, pValue, useFallback);
}
/* unassigned */
return 0;
}
/* unassigned */
return 0;
}

View File

@ -42,6 +42,14 @@
* If it is present, then an ICU version reading header versions 4.0 or 4.1
* will be able to use the base table and ignore the extension.
*
* The unicodeMask in the static data is part of the base table data structure.
* Especially, the UCNV_HAS_SUPPLEMENTARY flag determines the length of the
* fromUnicode stage 1 array.
* The static data unicodeMask refers only to the base table's properties if
* a base table is included.
* In an extension-only file, the static data unicodeMask is 0.
* The extension data indexes have a separate field with the unicodeMask flags.
*
* MBCS-style data structure following the static data.
* Offsets are counted in bytes from the beginning of the MBCS header structure.
* Details about usage in comments in ucnvmbcs.c.

View File

@ -214,6 +214,8 @@ CnvExtWrite(NewConverter *cnvData, const UConverterStaticData *staticData,
(extData->maxOutUChars<<8)|
extData->maxUCharsPerByte;
indexes[UCNV_EXT_FLAGS]=extData->ucm->ext->unicodeMask;
/* write the extension data */
udata_writeBlock(pData, indexes, sizeof(indexes));
udata_writeBlock(pData, utm_getStart(extData->toUTable), indexes[UCNV_EXT_TO_U_LENGTH]*4);
@ -1035,8 +1037,7 @@ static UBool
CnvExtAddTable(NewConverter *cnvData, UCMTable *table, UConverterStaticData *staticData) {
CnvExtData *extData;
staticData->unicodeMask=table->unicodeMask;
if(staticData->unicodeMask&UCNV_HAS_SURROGATES) {
if(table->unicodeMask&UCNV_HAS_SURROGATES) {
fprintf(stderr, "error: contains mappings for surrogate code points\n");
return FALSE;
}