ICU-389 Added getTargetUniCharFromUnicodeMBCS() Function.

X-SVN-Rev: 2355
This commit is contained in:
Ram Viswanadha 2000-08-25 23:14:10 +00:00
parent 8c9b4ae99c
commit e20ad2971f

View File

@ -86,17 +86,54 @@ UConverterDataISO2022;
/* ISO-2022 ----------------------------------------------------------------- */
/*Forward declaration */
U_CFUNC void T_UConverter_fromUnicode_UTF8 (UConverterFromUnicodeArgs * args,
UErrorCode * err);
U_CFUNC void T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC (UConverterFromUnicodeArgs * converter,
U_CFUNC void T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
UErrorCode * err);
/* Protos */
U_CFUNC void T_UConverter_fromUnicode_ISO_2022(UConverterFromUnicodeArgs * args,
UErrorCode * err);
U_CFUNC void T_UConverter_fromUnicode_ISO_2022_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
UErrorCode * err);
U_CFUNC UChar32 T_UConverter_getNextUChar_ISO_2022 (UConverterToUnicodeArgs * args,
UErrorCode * err);
U_CFUNC void T_UConverter_fromUnicode_ISO_2022_JP(UConverterFromUnicodeArgs* args,
UErrorCode* err);
U_CFUNC void UConverter_fromUnicode_ISO_2022_KR(UConverterFromUnicodeArgs* args,
UErrorCode* err);
U_CFUNC void UConverter_toUnicode_ISO_2022_KR(UConverterToUnicodeArgs* args, UErrorCode* err);
U_CFUNC void UConverter_toUnicode_ISO_2022_KR(UConverterToUnicodeArgs* args,
UErrorCode* err);
U_CFUNC void UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args,
UErrorCode* err);
U_CFUNC void T_UConverter_fromUnicode_EBCDIC_STATEFUL(UConverterFromUnicodeArgs * args,
UErrorCode * err);
U_CFUNC void T_UConverter_fromUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
UErrorCode * err);
U_CFUNC void T_UConverter_toUnicode_EBCDIC_STATEFUL(UConverterToUnicodeArgs * args,
UErrorCode * err);
U_CFUNC void T_UConverter_toUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC (UConverterToUnicodeArgs * args,
UErrorCode * err);
U_CFUNC UChar32 T_UConverter_getNextUChar_EBCDIC_STATEFUL (UConverterToUnicodeArgs * args,
UErrorCode * err);
#define ESC_2022 0x1B /*ESC*/
@ -181,45 +218,47 @@ int8_t normalize_esq_chars_2022[256] = {
int32_t escSeqStateTable_Key_2022[MAX_STATES_2022] = {
/* 0 1 2 3 4 5 6 7 8 9 */
1 ,34 ,36 ,39 ,55 ,57,1093 ,1096 ,1097 ,1098 ,1099
,1100 ,1101 ,1102 ,1103 ,1104 ,1105 ,1106 ,1109 ,1154 ,1157
,1160 ,1161 ,1176 ,1254 ,1257 ,1768 ,1773 ,35105 ,36933 ,36936
,36937 ,36938 ,36939 ,36940 ,36942 ,36943 ,36944 ,36945 ,36946 ,36947
,36948 ,37642 ,40133 ,40136 ,40138 ,40139 ,40140 ,40141 ,1123363 ,35947624
,35947625 ,35947626 ,35947627 ,35947629 ,35947630 ,35947631 ,35947635 ,35947636 ,35947638};
1 ,34 ,36 ,39 ,55 ,57 ,1093 ,1096 ,1097 ,1098
,1099 ,1100 ,1101 ,1102 ,1103 ,1104 ,1105 ,1106 ,1109 ,1154
,1157 ,1160 ,1161 ,1176 ,1254 ,1257 ,1768 ,1773 ,35105 ,36933
,36936 ,36937 ,36938 ,36939 ,36940 ,36942 ,36943 ,36944 ,36945 ,36946
,36947 ,36948 ,37642 ,40133 ,40136 ,40138 ,40139 ,40140 ,40141 ,1123363
,35947624 ,35947625 ,35947626 ,35947627 ,35947629 ,35947630 ,35947631 ,35947635 ,35947636 ,35947638};
const char* escSeqStateTable_Result_2022[MAX_STATES_2022] = {
/* 0 1 2 3 4 5 6 7 8 9 */
/* 0 1 2 3 4 5 6 7 8 9 */
NULL ,NULL ,NULL ,NULL ,NULL ,NULL,"latin1" ,"latin1" ,"latin1" ,"ibm-865" ,"ibm-865"
,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-895" ,"JISX-201" ,"latin1" ,"latin1" ,NULL ,"ibm-955"
,"GB2312" ,"JISX-208" ,NULL ,NULL ,"UTF8" ,"ISO-8859-1" ,"ISO-8859-7" ,NULL ,"ibm-955" ,"bm-367"
,"ibm-952" ,"ibm-949" ,"JISX-212" ,"ibm-1383" ,"ibm-952" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964"
,"ibm-964" ,"ibm-949" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,NULL ,"latin1"
,"ibm-912" ,"ibm-913" ,"ibm-914" ,"ibm-813" ,"ibm-1089" ,"ibm-920" ,"ibm-915" ,"ibm-915" ,"latin1"};
NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,"latin1" ,"latin1" ,"latin1" ,"ibm-865"
,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-895" ,"JISX-201" ,"latin1" ,"latin1" ,NULL
,"ibm-955" ,"GB2312" ,"JISX-208" ,NULL ,NULL ,"UTF8" ,"ISO-8859-1" ,"ISO-8859-7" ,NULL ,"ibm-955"
,"bm-367" ,"ibm-952" ,"ibm-949" ,"JISX-212" ,"ibm-1383" ,"ibm-952" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964"
,"ibm-964" ,"ibm-964" ,"ibm-949" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,NULL
,"latin1" ,"ibm-912" ,"ibm-913" ,"ibm-914" ,"ibm-813" ,"ibm-1089" ,"ibm-920" ,"ibm-915" ,"ibm-915" ,"latin1"};
UCNV_TableStates_2022 escSeqStateTable_Value_2022[MAX_STATES_2022] = {
/* 0 1 2 3 4 5 6 7 8 9 */
VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_SS2_SEQUENCE,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_SS2_SEQUENCE ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022};
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022};
/*for 2022 looks ahead in the stream
*to determine the longest possible convertible
*data stream*/
*data stream
*/
static const char* getEndOfBuffer_2022(const char* source,
const char* sourceLimit,
UBool flush);
/*runs through a state machine to determine the escape sequence - codepage correspondance
*changes the pointer pointed to be _this->extraInfo*/
*changes the pointer pointed to be _this->extraInfo
*/
static void changeState_2022(UConverter* _this,
const char** source,
const char* sourceLimit,
@ -229,6 +268,14 @@ static void changeState_2022(UConverter* _this,
UCNV_TableStates_2022 getKey_2022(char source,
int32_t* key,
int32_t* offset);
/* gets targetUniChar value from an
* MBCS converter
*/
void getUniCharFromUnicodeMBCS(UConverter* cnv, UErrorCode* err, UChar* sourceChar,
UChar32 *value ,UConverterCallbackReason *reason,
int32_t* length);
static void
_ISO2022Open(UConverter *cnv, const char *name, const char *locale, UErrorCode *errorCode) {
@ -318,7 +365,7 @@ _ISO2022Close(UConverter *converter) {
uprv_free (converter->extraInfo);
}
}
/*** ??? why are we going to UTF-8??*/
static void
_ISO2022Reset(UConverter *converter) {
if(!((UConverterDataISO2022 *) (converter->extraInfo))->currentLocale){
@ -367,6 +414,9 @@ U_CFUNC void T_UConverter_fromUnicode_ISO_2022_OFFSETS_LOGIC(UConverterFromUnico
const char *locale =myConverterData->currentLocale;
if(locale && uprv_stricmp(locale,"jp")==0){
T_UConverter_fromUnicode_ISO_2022_JP(args,err);
}
else if(locale && uprv_stricmp(locale,"kr")==0){
UConverter_fromUnicode_ISO_2022_KR(args,err);
}
else{
T_UConverter_fromUnicode_UTF8_OFFSETS_LOGIC(args, err);
@ -385,15 +435,15 @@ U_CFUNC void T_UConverter_fromUnicode_ISO_2022_OFFSETS_LOGIC(UConverterFromUnico
/*************************** BEGIN ****************************/
/************************************** IMPORTANT **************************************************
* The T_fromUnicode_ISO2022_JP converter doesnot use ucnv_fromUnicode() functions for SBCS and DBCS,
* instead the values are obtained directly by accessing the sharedData structs through ucmp8_getU()
* and ucmp16_getU() macros to increase speed, reduce the overhead of function call and make it
* The T_fromUnicode_ISO2022_JP converter doesnot use ucnv_fromUnicode() functions for SBCS,DBCS and
* MBCS instead the values are obtained directly by accessing the sharedData structs through ucmp8_getU()
* ucmp16_getU() macros,and for MBCS by emulating the Markus's code to increase speed, reduce the overhead of function call and make it
* efficient.The converter iterates over each Unicode codepoint to obtain the equivalent codepoints
* from the codepages supported. Since the source buffer is processed one char at a time it would
* make sense to reduce the extra processing a canned converter would do as far as possible.
*
* If the implementation of these macros or structure of sharedData struct change in the future, make
* sure that ISO-2022-JP is also changed.
* sure that ISO-2022 is also changed.
***************************************************************************************************
*/
@ -490,18 +540,18 @@ U_CFUNC void T_UConverter_fromUnicode_ISO_2022_JP(UConverterFromUnicodeArgs* arg
unsigned char *myTarget = (unsigned char *) args->target;
char *uBuf =(char*) malloc(sizeof(char) * 4);
char *targetChar;
char *targetLimit;
/*char *targetChar;
char *targetLimit; */
const UChar *saveSource;
char *saveTarget;
int32_t *saveOffsets ;
const UChar* mySourceLimit;
/* const UChar* mySourceLimit;*/
int32_t myTargetLength = args->targetLimit - args->target;
int32_t mySourceLength = args->sourceLimit - args->source;
int32_t mySourceIndex = 0;
int32_t myTargetIndex = 0;
int32_t length =0;
CompactShortArray *myFromUnicodeDBCS = NULL;
CompactShortArray *myFromUnicodeDBCSFallback = NULL;
CompactByteArray *myFromUnicodeSBCS = NULL;
@ -657,43 +707,21 @@ U_CFUNC void T_UConverter_fromUnicode_ISO_2022_JP(UConverterFromUnicodeArgs* arg
case MBCS:
sourceCharPtr =&mySourceChar;
mySourceLimit= sourceCharPtr+1;
targetChar =uBuf;
targetLimit = uBuf+4;
ucnv_fromUnicode(myConverterData->fromUnicodeConverter,
&targetChar,targetLimit,
&sourceCharPtr,
mySourceLimit,args->offsets,args->flush,err);
if(U_FAILURE(*err)){
getUniCharFromUnicodeMBCS(myConverterData->fromUnicodeConverter,err,
&mySourceChar,
&targetUniChar,&reason, &length);
if(U_FAILURE(*err)){
targetUniChar = missingCharMarker;
*err =U_ZERO_ERROR;
}else{
/*convert to targetUniChar*/
uint8_t len=(uint8_t)(targetChar-uBuf);
targetChar=uBuf;
targetUniChar=0;
/* the below switch structure is not required since IS2022-JP-2
* supports only DBCS char sets. Reverse engineered from DBCS code.
* the cases fall through without break
*/
switch(len){
case 4:
targetUniChar+=(UChar32)((uint8_t)(*targetChar++))<<24;
case 3:
targetUniChar+=(UChar32)((uint8_t)(*targetChar++))<<16;
case 2:
targetUniChar+=(UChar32)((uint8_t)(*targetChar++))<<8;
case 1:
targetUniChar+=(uint8_t)(*targetChar);
default:
break;
}
}
}
/* only DBCS or SBCS characters are expected*/
else if(length > 2){
reason =UCNV_ILLEGAL;
*err =U_INVALID_CHAR_FOUND;
goto CALLBACK;
}
break;
case LATIN1:
@ -930,11 +958,11 @@ UCNV_TableStates_2022 getKey_2022(char c,
int32_t oldmid=0;
if (*key == 0){
togo = (int8_t)normalize_esq_chars_2022[c];
togo = (int8_t)normalize_esq_chars_2022[(int)c];
}
else{
togo <<= 5;
togo += (int8_t)normalize_esq_chars_2022[c];
togo += (int8_t)normalize_esq_chars_2022[(int)c];
}
while (hi != low) /*binary search*/{
@ -1049,7 +1077,15 @@ void changeState_2022(UConverter* _this,
goto DONE;
};
break;
/* Not expected. Added to make the gcc happy */
case VALID_SS2_SEQUENCE:
{
(*source)++;
key = 0;
goto DONE;
}
case VALID_NON_TERMINAL_2022:
case VALID_MAYBE_TERMINAL_2022:
@ -1370,6 +1406,132 @@ U_CFUNC void T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeAr
* ii) There are only 2 shifting sequences SO to shift into double byte mode
* and SI to shift into single byte mode
*/
const char* getEndOfBuffer_2022_KR(UConverterToUnicodeArgs* args, UErrorCode* err);
void getUniCharFromUnicodeMBCS(UConverter* cnv, UErrorCode* err, UChar* sourceChar, UChar32 *value ,UConverterCallbackReason *reason, int32_t* length){
uint32_t i;
UChar c = *sourceChar;
const uint16_t *table=cnv->sharedData->table->mbcs.fromUnicodeTable;
const uint8_t *bytes=cnv->sharedData->table->mbcs.fromUnicodeBytes;
uint8_t outputType=cnv->sharedData->table->mbcs.outputType;
i=0x440+2*((uint32_t)table[c>>10]+((c>>4)&0x3f));
/* is this code point assigned, or do we use fallbacks? */
if((table[i++]&(1<<(c&0xf)))!=0 || cnv->useFallback) {
const uint8_t *p;
/* get the bytes and the length for the output */
switch(outputType) {
case MBCS_OUTPUT_1:
p=bytes+(16*(uint32_t)table[i]+(c&0xf));
*value=*p;
*length=1;
break;
case MBCS_OUTPUT_2:
p=bytes+(16*(uint32_t)table[i]+(c&0xf))*2;
# if U_IS_BIG_ENDIAN
*value=*(uint16_t *)p;
# else
*value=((uint32_t)*p<<8)|p[1];
# endif
if(*value<=0xff) {
*length=1;
} else {
*length=2;
}
break;
case MBCS_OUTPUT_3:
p=bytes+(16*(uint32_t)table[i]+(c&0xf))*3;
*value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
if(*value<=0xff) {
*length=1;
} else if(*value<=0xffff) {
*length=2;
} else {
*length=3;
}
break;
case MBCS_OUTPUT_4:
p=bytes+(16*(uint32_t)table[i]+(c&0xf))*4;
# if U_IS_BIG_ENDIAN
*value=*(uint32_t *)p;
# else
*value=((uint32_t)*p<<24)|((uint32_t)p[1]<<16)|((uint32_t)p[2]<<8)|p[3];
# endif
if(*value<=0xff) {
*length=1;
} else if(*value<=0xffff) {
*length=2;
} else if(*value<=0xffffff) {
*length=3;
} else {
*length=4;
}
break;
case MBCS_OUTPUT_3_EUC:
p=bytes+(16*(uint32_t)table[i]+(c&0xf))*2;
# if U_IS_BIG_ENDIAN
*value=*(uint16_t *)p;
# else
*value=((uint32_t)*p<<8)|p[1];
# endif
/* EUC 16-bit fixed-length representation */
if(*value<=0xff) {
*length=1;
} else if((*value&0x8000)==0) {
*value|=0x8e8000;
*length=3;
} else if((*value&0x80)==0) {
*value|=0x8f0080;
*length=3;
} else {
*length=2;
}
break;
case MBCS_OUTPUT_4_EUC:
p=bytes+(16*(uint32_t)table[i]+(c&0xf))*3;
*value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];
/* EUC 16-bit fixed-length representation applied to the first two bytes */
if(*value<=0xff) {
*length=1;
} else if(*value<=0xffff) {
*length=2;
} else if((*value&0x800000)==0) {
*value|=0x8e800000;
*length=4;
} else if((*value&0x8000)==0) {
*value|=0x8f008000;
*length=4;
} else {
*length=3;
}
break;
default:
/* must not occur */
*value=0;
*length=0;
break;
}
/* is the codepage value really an "unassigned" indicator? */
if(*value==0 && c!=0 && (table[i-1]&(1<<(c&0xf)))==0) {
*reason=UCNV_UNASSIGNED;
*err=U_INVALID_CHAR_FOUND;
return;
}
} else {
/* callback(unassigned) */
*reason=UCNV_UNASSIGNED;
*err=U_INVALID_CHAR_FOUND;
return;
}
}
U_CFUNC void UConverter_fromUnicode_ISO_2022_KR(UConverterFromUnicodeArgs* args, UErrorCode* err){
@ -1379,20 +1541,16 @@ U_CFUNC void UConverter_fromUnicode_ISO_2022_KR(UConverterFromUnicodeArgs* args,
int32_t myTargetIndex = 0;
int32_t targetLength = args->targetLimit - args->target;
int32_t sourceLength = args->sourceLimit - args->source;
UChar* mySourceLimit;
CompactShortArray *myFromUnicode = NULL;
int32_t length=0;
UChar32 targetUniChar = 0x0000;
UChar mySourceChar = 0x0000;
UChar mySourceChar = 0x0000,c=0x0000;
UBool isTargetUCharDBCS = (UBool)args->converter->fromUnicodeStatus;
UBool oldIsTargetUCharDBCS = isTargetUCharDBCS;
UConverterDataISO2022 *myConverterData=(UConverterDataISO2022*)args->converter->extraInfo;
UConverterCallbackReason reason;
char *uBuf =(char*) malloc(sizeof(char) * 4);
char *targetChar;
char *targetLimit;
uint8_t len;
UChar *sourceCharPtr=NULL;
UConverter* cnv =myConverterData->fromUnicodeConverter;
isTargetUCharDBCS = (UBool) args->converter->fromUnicodeStatus;
/*writing the char to the output stream */
@ -1402,7 +1560,7 @@ U_CFUNC void UConverter_fromUnicode_ISO_2022_KR(UConverterFromUnicodeArgs* args,
if (myTargetIndex < targetLength){
mySourceChar = (UChar) args->source[mySourceIndex++];
c=mySourceChar = (UChar) args->source[mySourceIndex++];
/*Handle surrogates */
if(UTF_IS_LEAD(mySourceChar)){
@ -1455,40 +1613,19 @@ U_CFUNC void UConverter_fromUnicode_ISO_2022_KR(UConverterFromUnicodeArgs* args,
}
}
}
getUniCharFromUnicodeMBCS(cnv,err, &mySourceChar, &targetUniChar,&reason, &length);
sourceCharPtr =&mySourceChar;
mySourceLimit= sourceCharPtr+1;
targetChar =uBuf;
targetLimit = uBuf+4;
ucnv_fromUnicode(myConverterData->fromUnicodeConverter,
&targetChar,targetLimit,
&sourceCharPtr,
mySourceLimit,args->offsets,args->flush,err);
if(U_SUCCESS(*err)){
len=(uint8_t)(targetChar-uBuf);
targetChar=uBuf;
targetUniChar=0;
switch(len){
case 4:
targetUniChar+=(UChar32)((uint8_t)(*targetChar++))<<24;
case 3:
targetUniChar+=(UChar32)((uint8_t)(*targetChar++))<<16;
case 2:
targetUniChar+=(UChar32)((uint8_t)(*targetChar++))<<8;
case 1:
targetUniChar+=(uint8_t)(*targetChar);
default:
break;
}
if(*err>0){
goto CALLBACK;
}
else{
targetUniChar=missingCharMarker;
/* only DBCS or SBCS characters are expected*/
if(length > 2){
reason =UCNV_ILLEGAL;
*err =U_INVALID_CHAR_FOUND;
goto CALLBACK;
}
oldIsTargetUCharDBCS = isTargetUCharDBCS;
isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF);
@ -1590,8 +1727,6 @@ U_CFUNC void UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnic
int32_t myTargetIndex = 0;
int32_t targetLength = args->targetLimit - args->target;
int32_t sourceLength = args->sourceLimit - args->source;
UChar* mySourceLimit;
CompactShortArray *myFromUnicode = NULL;
UChar32 targetUniChar = 0x0000;
UChar mySourceChar = 0x0000;
UBool isTargetUCharDBCS = (UBool)args->converter->fromUnicodeStatus;
@ -1600,10 +1735,9 @@ U_CFUNC void UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnic
UConverterCallbackReason reason;
char *uBuf =(char*) malloc(sizeof(char) * 4);
char *targetChar;
char *targetLimit;
uint8_t len;
int32_t length =0;
UChar *sourceCharPtr=NULL;
UConverter* cnv =myConverterData->fromUnicodeConverter;
isTargetUCharDBCS = (UBool) args->converter->fromUnicodeStatus;
/*writing the char to the output stream */
@ -1668,37 +1802,17 @@ U_CFUNC void UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnic
}
sourceCharPtr =&mySourceChar;
mySourceLimit= sourceCharPtr+1;
targetChar =uBuf;
targetLimit = uBuf+4;
ucnv_fromUnicode(myConverterData->fromUnicodeConverter,
&targetChar,targetLimit,
&sourceCharPtr,
mySourceLimit,args->offsets,args->flush,err);
if(U_SUCCESS(*err)){
len=(uint8_t)(targetChar-uBuf);
targetChar=uBuf;
targetUniChar=0;
switch(len){
case 4:
targetUniChar+=(UChar32)((uint8_t)(*targetChar++))<<24;
case 3:
targetUniChar+=(UChar32)((uint8_t)(*targetChar++))<<16;
case 2:
targetUniChar+=(UChar32)((uint8_t)(*targetChar++))<<8;
case 1:
targetUniChar+=(uint8_t)(*targetChar);
default:
break;
}
getUniCharFromUnicodeMBCS(cnv,err, &mySourceChar, &targetUniChar,&reason, &length);
if(*err>0){
goto CALLBACK;
}
else{
targetUniChar=missingCharMarker;
/* only DBCS or SBCS characters are expected*/
if(length > 2){
reason =UCNV_ILLEGAL;
*err =U_INVALID_CHAR_FOUND;
goto CALLBACK;
}
oldIsTargetUCharDBCS = isTargetUCharDBCS;
isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF);
@ -1822,8 +1936,6 @@ const char* getEndOfBuffer_2022_KR(UConverterToUnicodeArgs* args, UErrorCode* er
args->flush,
err);
}
/* validateKREscape(args);*/
if(U_FAILURE(*err))
return mySource;
else