ICU-2778 make illegal/truncated chars consistent

X-SVN-Rev: 11434
This commit is contained in:
Ram Viswanadha 2003-04-02 02:59:14 +00:00
parent 777eabd944
commit bb6aadbf80
2 changed files with 22 additions and 6 deletions

View File

@ -46,7 +46,7 @@
#define INDIC_RANGE (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN)
#define VOCALLIC_RR 0x0931
#define LF 0x0A
#define ASCII_END 0x9f
#define ASCII_END 0xA0
#define NO_CHAR_MARKER 0xFFFE
#define TELUGU_DELTA DELTA * TELUGU
#define DEV_ABBR_SIGN 0x0970
@ -637,7 +637,7 @@ static const uint16_t toUnicodeTable[256]={
0x009d,/* 0x9d */
0x009e,/* 0x9e */
0x009f,/* 0x9f */
0x0900,/* 0xa0 */
0x00A0,/* 0xa0 */
0x0901,/* 0xa1 */
0x0902,/* 0xa2 */
0x0903,/* 0xa3 */
@ -1188,8 +1188,19 @@ UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
data->currentDeltaToUnicode = data->defDeltaToUnicode;
data->currentMaskToUnicode = data->defMaskToUnicode;
}else{
/* these are display codes consume and continue */
if((sourceChar >= 0x21 && sourceChar <= 0x3F)){
/* these are display codes consume and continue */
}else{
*err =U_ILLEGAL_CHAR_FOUND;
/* reset */
*contextCharToUnicode=NO_CHAR_MARKER;
goto CALLBACK;
}
}
/* reset */
*contextCharToUnicode=NO_CHAR_MARKER;
@ -1220,6 +1231,7 @@ UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
reason = UCNV_UNASSIGNED;
}else{
/* only 0xA1 - 0xEE are legal after EXT char */
*contextCharToUnicode= NO_CHAR_MARKER;
reason= UCNV_ILLEGAL;
*err = U_ILLEGAL_CHAR_FOUND;
}
@ -1381,9 +1393,9 @@ CALLBACK:
}
if((args->flush==TRUE)
&& (source == sourceLimit)
&& data->contextCharToUnicode !=0){
&& data->contextCharToUnicode != NO_CHAR_MARKER){
/* if we have ATR in context it is an error */
if(data->contextCharToUnicode==ATR || data->contextCharToUnicode==EXT){
if(data->contextCharToUnicode==ATR || data->contextCharToUnicode==EXT || *toUnicodeStatus == missingCharMarker){
*err = U_TRUNCATED_CHAR_FOUND;
}else{
WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -1),

View File

@ -2966,7 +2966,7 @@ TestISCII(){
0x095C,
0x095D,
0x095E,
0x0020, 0x094D, 0x0930
0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
};
static const unsigned char byteArr[]={
@ -3038,6 +3038,10 @@ TestISCII(){
0xC9, 0xE9,
/* INV halant RA */
0xD9, 0xE8, 0xCF,
0x00, 0x00A0,
/* just consume unhandled codepoints */
0xEF, 0x30,
};
testConvertToU(byteArr,(sizeof(byteArr)),in,(sizeof(in)/U_SIZEOF_UCHAR),"x-iscii-de",NULL,TRUE);
TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));