ICU-947 ISO-2022 performance improvements

X-SVN-Rev: 4620
2001-05-07 23:54:01 +00:00 · 2001-05-07 23:54:01 +00:00 · 0a875cdc0b
commit 0a875cdc0b
parent 3edc7c1739
3 changed files with 292 additions and 227 deletions
--- a/icu4c/source/common/ucnv2022.c
+++ b/icu4c/source/common/ucnv2022.c
@ -34,14 +34,15 @@
 #include "ucnv_cnv.h"
 #include "unicode/ustring.h"
 #include "unicode/ucnv_cb.h"
+#include "ucnvmbcs.h"
 #include "cstring.h"

-#define CONCAT_ESCAPE_EX(args, target, targetLimit,offsets,strToAppend,len, err){\
+#define CONCAT_ESCAPE_EX(args,source, target, targetLimit,offsets,strToAppend,len, err){\
    while(len-->0){\
        if(target < targetLimit){\
            *(target++) = (unsigned char) *(strToAppend++);\
            if(offsets){\
-                *(offsets++) = source - args->source -1;\
+                *(offsets++) = source -  args->source -1;\
            }\
        }\
        else{\
@ -51,6 +52,72 @@
    }\
 }

+#define  MBCS_FROM_UCHAR32_ISO2022(sharedData,c,  value, useFallback, length, outputType) {\
+    const uint16_t *table=sharedData->table->mbcs.fromUnicodeTable;\
+    uint32_t stage2Entry;\
+    uint32_t myValue;\
+    const uint8_t *p;\
+    /* BMP-only codepages are stored without stage 1 entries for supplementary code points */\
+    if(c<0x10000 || (sharedData->table->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {\
+        stage2Entry=MBCS_STAGE_2_FROM_U(table, c);\
+        /* get the bytes and the length for the output */\
+        if(outputType==MBCS_OUTPUT_2){\
+            myValue=MBCS_VALUE_2_FROM_STAGE_2(sharedData->table->mbcs.fromUnicodeBytes, stage2Entry, c);\
+            if(myValue<=0xff) {\
+                length=1;\
+            } else {\
+                length=2;\
+            }\
+        }else if(outputType==MBCS_OUTPUT_3){\
+            p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->table->mbcs.fromUnicodeBytes, stage2Entry, c);\
+            myValue=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];\
+            if(myValue<=0xff) {\
+                length=1;\
+            } else if(myValue<=0xffff) {\
+                length=2;\
+            } else {\
+                length=3;\
+            }\
+        }\
+        /* is this code point assigned, or do we use fallbacks? */\
+        if( (stage2Entry&(1<<(16+(c&0xf))))!=0 ||\
+            (FROM_U_USE_FALLBACK(useFallback, c) && (myValue!=0 || c==0))\
+        ) {\
+            /*\
+             * We allow a 0 byte output if the Unicode code point is\
+             * U+0000 and also if the "assigned" bit is set for this entry.\
+             * There is no way with this data structure for fallback output\
+             * for other than U+0000 to be a zero byte.\
+             */\
+            /* assigned */\
+            value=myValue;\
+        } else {\
+            length=0;\
+        }\
+    }else{\
+        length=0;\
+    }\
+}
+#define MBCS_SINGLE_FROM_UCHAR32(sharedData, c,retval, useFallback) { \
+    const uint16_t *table; \
+    int32_t value;\
+    /* BMP-only codepages are stored without stage 1 entries for supplementary code points */\
+    if(c>=0x10000 && !(sharedData->table->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {\
+        value= -1;\
+    }\
+    /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */\
+    table=sharedData->table->mbcs.fromUnicodeTable;\
+    /* get the byte for the output */\
+    value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->table->mbcs.fromUnicodeBytes, c);\
+    /* is this code point assigned, or do we use fallbacks? */\
+    if(useFallback ? value>=0x800 : value>=0xc00) {\
+        value &=0xff;\
+    } else {\
+        value= -1;\
+    }\
+    retval=(uint16_t) value;\
+}
+
 #define UCNV_SS2 "\x1B\x4E"
 #define UCNV_SS3 "\x1B\x4F"
 #define UCNV_SS2_LEN 2
@ -1341,7 +1408,6 @@ static  const int32_t escSeqCharsLen[MAX_VALID_CP_JP] ={
    3  /* length of <ESC>(I  HWKANA_7BIT */
 };

-
 /*
 * The iteration over various code pages works this way:
 * i)   Get the currentState from myConverterData->currentState
@ -1368,11 +1434,14 @@ UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
    const UChar* source = args->source;
    const UChar* sourceLimit = args->sourceLimit;
    int32_t* offsets = args->offsets;
-    uint32_t targetUniChar = missingCharMarker;
+    int32_t offset = 0;
+    uint32_t targetByteUnit = missingCharMarker;
    UChar32 sourceChar  =0x0000;
    const char* escSeq = NULL;
    int len =0; /*length of escSeq chars*/
    UConverterCallbackReason reason;
+    UConverterSharedData* sharedData=NULL;
+    UBool useFallback = args->converter->useFallback;

    /* state variables*/
    StateEnum* currentState       = &converterData->fromUnicodeCurrentState;
@ -1380,7 +1449,7 @@ UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
    UConverter** currentConverter = &converterData->fromUnicodeConverter;
    Cnv2022Type* currentType      = &converterData->currentType;
    UConverter** convArray        = converterData->myConverterArray;
-
+    
    /* arguments check*/
    if ((args->converter == NULL) || (targetLimit < target) || (sourceLimit < source)){
        *err = U_ILLEGAL_ARGUMENT_ERROR;
@ -1397,15 +1466,15 @@ UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
    if(args->converter->fromUSurrogateLead!=0 && target< targetLimit) {
        goto getTrail;
    }
-
+    
+    *currentConverter = convArray[(*currentConverter==NULL) ? 0 : (int)*currentState];
+    sharedData= (*currentConverter)->sharedData;
+    
    while( source < sourceLimit){

-        *currentConverter = convArray[(*currentConverter==NULL) ? 0 : (int)*currentState];
-
-        targetUniChar = missingCharMarker;
+        targetByteUnit = missingCharMarker;

        if(target < targetLimit){
-
            sourceChar  = *(source++);
            if(sourceChar > SPACE) {
                do{
@ -1413,22 +1482,26 @@ UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
                    /* most common case*/
                    case DBCS:
                        {
-                            uint32_t value;
-                            if(2 == _MBCSFromUChar32((*currentConverter)->sharedData, 
-                                                     sourceChar, &value, args->converter->useFallback)) {
-                                targetUniChar = (uint16_t)value;
+                            uint32_t value=0;
+                            int length=0;
+                            /*if(2 == _MBCSFromUChar32(sharedData,sourceChar, &value, useFallback)) {
+                                targetByteUnit = (uint16_t)value;
+                            }*/
+                            MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,value,useFallback,length,MBCS_OUTPUT_2);
+                            if(length==2){
+                                targetByteUnit =  value;
                            }
                        }
                        break;
                    case ASCII1:
                        if(sourceChar < 0x7f){
-                            targetUniChar = sourceChar;
+                            targetByteUnit = sourceChar;
                        }
                        break;

                    case SBCS:
-                        targetUniChar = (uint16_t)_MBCSSingleFromUChar32((*currentConverter)->sharedData, 
-                                                                    sourceChar, args->converter->useFallback);
+                       MBCS_SINGLE_FROM_UCHAR32(sharedData,sourceChar,targetByteUnit,useFallback);
+                       // targetByteUnit=(uint16_t)_MBCSSingleFromUChar32(sharedData,sourceChar,useFallback);
                        /*
                         * If mySourceChar is unassigned, then _MBCSSingleFromUChar32() returns -1
                         * which becomes the same as missingCharMarker with the cast to uint16_t.
@ -1437,14 +1510,14 @@ UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
                        if(0xFF9F-sourceChar<=(0xFF9F-0xFF61)){
                            if( converterData->version==3){
                                /*we get a1-df from _MBCSSingleFromUChar32 so subtract 0x80*/
-                                targetUniChar-=0x80; 
+                                targetByteUnit-=0x80; 
                                *currentState = HWKANA_7BIT;
                            }
                            else if( converterData->version==4){
                                *currentState = JISX201;
                            }
                            else{
-                                targetUniChar=missingCharMarker;
+                                targetByteUnit=missingCharMarker;
                            }
                            *currentConverter = convArray[(*currentConverter==NULL) ? 0 : (int)*currentState];
                            *currentType = (Cnv2022Type) myConverterType[*currentState];
@ -1453,7 +1526,7 @@ UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args

                    case LATIN1:
                        if(sourceChar <= 0x00FF){
-                            targetUniChar = sourceChar;
+                            targetByteUnit = sourceChar;
                        }

                        break;
@ -1461,10 +1534,11 @@ UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
                        /*not expected */
                        break;
                    }
-                    if(targetUniChar==missingCharMarker){
+                    if(targetByteUnit==missingCharMarker){
                        *currentState = nextStateArray[converterData->version][*currentState];
                        *currentConverter = convArray[(*currentConverter==NULL) ? 0 : (int)*currentState];
                        *currentType = (Cnv2022Type) myConverterType[*currentState];
+                        sharedData= (*currentConverter)->sharedData;
                   }
                   else
                       /*got the mapping so break from while loop*/
@ -1474,63 +1548,63 @@ UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args

            }
            else{
-                targetUniChar = sourceChar;
+                targetByteUnit = sourceChar;
                *currentState = ASCII;
                *currentType = (Cnv2022Type) myConverterType[*currentState];
            }

-            if(targetUniChar != missingCharMarker){
+            if(targetByteUnit != missingCharMarker){

                if( *currentState != initIterState){

                    escSeq = escSeqChars[(int)*currentState];
                    len = escSeqCharsLen[(int)*currentState];

-                    CONCAT_ESCAPE_EX(args, target, targetLimit, offsets, escSeq,len,err);
+                    CONCAT_ESCAPE_EX(args,source, target,targetLimit, offsets, escSeq,len,err);

                    /* Append SSN for shifting to G2 */
                    if(*currentState==ISO8859_1 || *currentState==ISO8859_7){
                        escSeq = UCNV_SS2;
                        len = UCNV_SS2_LEN;
-                        CONCAT_ESCAPE_EX(args, target, targetLimit,offsets, escSeq,len,err);
+                        CONCAT_ESCAPE_EX(args, source, target, targetLimit,offsets, escSeq,len,err);
                    }
                }
                initIterState = *currentState;
-                /* write the targetUniChar  to target */
-                if(targetUniChar <= 0x00FF){
+                offset = source - args->source -1;
+                /* write the targetByteUnit  to target */
+                if(targetByteUnit <= 0x00FF){
                    if( target <targetLimit){
-                        *(target++) = (unsigned char) targetUniChar;
+                        *(target++) = (unsigned char) targetByteUnit;
                        if(offsets){
-                            *(offsets++) = source - args->source-1;
+                            *(offsets++) = offset;
                        }

                    }else{
-                        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) targetUniChar;
+                        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) targetByteUnit;
                        *err = U_BUFFER_OVERFLOW_ERROR;
                    }
                }else{
                    if(target < targetLimit){
-                        *(target++) =(unsigned char) (targetUniChar>>8);
+                        *(target++) =(unsigned char) (targetByteUnit>>8);
                        if(offsets){
-                              *(offsets++) = source - args->source-1;
+                              *(offsets++) = offset;
                        }
                        if(target < targetLimit){
-                            *(target++) =(unsigned char) (targetUniChar);
+                            *(target++) =(unsigned char) (targetByteUnit);
                            if(offsets){
-                                *(offsets++) = source - args->source-1;
+                                *(offsets++) = offset;
                            }

                        }else{
-                            args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetUniChar);
+                            args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit);
                            *err = U_BUFFER_OVERFLOW_ERROR;
                        }
                    }else{
-                        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetUniChar>>8);
-                        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetUniChar);
+                        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit>>8);
+                        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit);
                        *err = U_BUFFER_OVERFLOW_ERROR;
                    }
                }
-
            }
            else{

@ -1608,7 +1682,6 @@ getTrail:
    args->target = (char*)target;
 }

-
 /*************** to unicode *******************/

 /****************************************************************************
@ -1715,9 +1788,8 @@ UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
            mySourceChar= (unsigned char) *mySource++;
            
            /* Consume the escape sequences and ascertain the state */
-            switch(mySourceChar){
-            case UCNV_SI:
-                 if(myData->version==3 && *toUnicodeStatus==0x00){
+            if(mySourceChar==UCNV_SI){
+                if(myData->version==3 && *toUnicodeStatus==0x00){
                    if(myData->toUnicodeSaveState!=INVALID_STATE){
                        *currentState = (StateEnum) myData->toUnicodeSaveState;
                        continue;
@ -1726,53 +1798,59 @@ UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
                        *err =U_ILLEGAL_CHAR_FOUND;
                        goto CALLBACK;
                    }
-                
+        
                }
-                else
+                else{
                    goto CALLBACK;
-
-            case UCNV_SO:
+                }
+            }else if(mySourceChar==UCNV_SO){
                if(myData->version==3 && *toUnicodeStatus==0x00){
                    myData->toUnicodeSaveState= (int) *currentState;
                    *currentState = HWKANA_7BIT;
                    continue;
                }
-                else
+                else{
                    goto CALLBACK;
-            default:
-                if(myData->key==0){
-                    if(mySourceChar<=SPACE){
-                        if(*toUnicodeStatus== 0x00){
-                            *currentState = ASCII;
+                }
+            }else if(mySourceChar==ESC_2022 || myData->key!=0){
+                if(*toUnicodeStatus== 0x00){
+                        mySource--;
+                        changeState_2022(args->converter,&(mySource), 
+                            args->sourceLimit, args->flush,ISO_2022_JP,&plane, err);
+                        /*Invalid or illegal escape sequence */
+                        if(U_SUCCESS(*err)){
+                            continue;

                        }
-                        else
-                            goto CALLBACK;
-
-                    }
-                    break;
-                }
-            case ESC_2022:
-                if(*toUnicodeStatus== 0x00){
-                    mySource--;
-                    changeState_2022(args->converter,&(mySource), 
-                        args->sourceLimit, args->flush,ISO_2022_JP,&plane, err);
-                    /*Invalid or illegal escape sequence */
-                    if(U_SUCCESS(*err)){
-                        continue;
-
-                    }
-                    else{
-                        args->target = myTarget;
-                        args->source = mySource;
-                        return;
-                    }
-                }
-                else
+                        else{
+                            args->target = myTarget;
+                            args->source = mySource;
+                            return;
+                        }
+                  }
+                else{
                    goto CALLBACK;
+                }           
            }

            switch(myConverterType[*currentState]){
+            case DBCS:
+                if(*toUnicodeStatus== 0x00){
+                    *toUnicodeStatus= (UChar) mySourceChar;
+                    continue;
+                }
+                else{
+                    const char *pBuf;
+
+                    tempBuf[0] = (char) args->converter->toUnicodeStatus;
+                    tempBuf[1] = (char) mySourceChar;
+                    mySourceChar+= (args->converter->toUnicodeStatus)<<8;
+                    *toUnicodeStatus= 0;
+                    pBuf = tempBuf;
+                    targetUniChar = _MBCSSimpleGetNextUChar(myData->currentConverter->sharedData, &pBuf, tempBuf+2, args->converter->useFallback);
+                }
+                break;
+

            case ASCII1:
                if( mySourceChar < 0x7F){
@ -1799,23 +1877,6 @@ UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,

                break;

-            case DBCS:
-                if(*toUnicodeStatus== 0x00){
-                    *toUnicodeStatus= (UChar) mySourceChar;
-                    continue;
-                }
-                else{
-                    const char *pBuf;
-
-                    tempBuf[0] = (char) args->converter->toUnicodeStatus;
-                    tempBuf[1] = (char) mySourceChar;
-                    mySourceChar+= (args->converter->toUnicodeStatus)<<8;
-                    *toUnicodeStatus= 0;
-                    pBuf = tempBuf;
-                    targetUniChar = _MBCSSimpleGetNextUChar(myData->currentConverter->sharedData, &pBuf, tempBuf+2, args->converter->useFallback);
-                }
-                break;
-
            case LATIN1:
                
                targetUniChar = (UChar) mySourceChar;
@ -1921,6 +1982,8 @@ UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
    UBool oldIsTargetByteDBCS = isTargetByteDBCS;
    UConverterDataISO2022 *converterData=(UConverterDataISO2022*)args->converter->extraInfo;
    UConverterCallbackReason reason;
+    UConverterSharedData* sharedData = converterData->fromUnicodeConverter->sharedData;
+    UBool useFallback = args->converter->useFallback;
    int32_t length =0;

    /*Arguments Check*/
@ -1951,8 +2014,9 @@ UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args

        if(target < (unsigned char*) args->targetLimit){
            sourceChar = *source++;
-            length= _MBCSFromUChar32(converterData->fromUnicodeConverter->sharedData,
-                sourceChar,&targetByteUnit,args->converter->useFallback);
+           /* length= _MBCSFromUChar32(converterData->fromUnicodeConverter->sharedData,
+                sourceChar,&targetByteUnit,args->converter->useFallback);*/
+            MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,targetByteUnit,useFallback,length,MBCS_OUTPUT_2);
            /* only DBCS or SBCS characters are expected*/
            /* DB haracters with high bit set to 1 are expected */
            if(length > 2 || length==0 ||(((targetByteUnit & 0x8080) != 0x8080)&& length==2)){
@ -2147,6 +2211,8 @@ UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
    UChar mySourceChar = 0x0000;
    UConverterDataISO2022* myData=(UConverterDataISO2022*)(args->converter->extraInfo);
    int plane =0; /*dummy variable */
+    UConverterSharedData* sharedData = myData->fromUnicodeConverter->sharedData;
+    UBool useFallback = args->converter->useFallback;

    /*Arguments Check*/
    if (U_FAILURE(*err)){ 
@ -2169,44 +2235,31 @@ UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,

            mySourceChar= (unsigned char) *mySource++;

-            switch(mySourceChar){
-
-                case UCNV_SI:
-                    myData->currentType = SBCS;
-                    /*consume the source */
-                    continue;
-                case UCNV_SO:
-                    myData->currentType =DBCS;
-                    /*consume the source */
-                    continue;
-
-                default:
-                    /* If we are in the process of consuming an escape sequence 
-                     * we fall through execute the the statements of next switch 
-                     * tag else we break;
-                     */
-                    if(myData->key==0){
-                        break;
-                    }
-                case ESC_2022:
-                {
-                    /* Already doing some conversion and found escape Sequence*/
-                    if(args->converter->mode == UCNV_SO){
-                        *err = U_ILLEGAL_ESCAPE_SEQUENCE;
-                    }
-                    else{
-                        mySource--;
-                        changeState_2022(args->converter,&(mySource), 
-                                        args->sourceLimit, args->flush,ISO_2022_KR,&plane, err);
-                    }
-                    if(U_FAILURE(*err)){
-                        args->target = myTarget;
-                        args->source = mySource;
-                        return;
-                    }
-                    continue;
+            if(mySourceChar==UCNV_SI){
+                myData->currentType = SBCS;
+                /*consume the source */
+                continue;
+            }else if(mySourceChar==UCNV_SO){
+                myData->currentType = DBCS;
+                /*consume the source */
+                continue;
+            }else if(mySourceChar==ESC_2022 || myData->key!=0){
+                /* Already doing some conversion and found escape Sequence*/
+                if(args->converter->mode == UCNV_SO){
+                    *err = U_ILLEGAL_ESCAPE_SEQUENCE;
                }
-            }
+                else{
+                    mySource--;
+                    changeState_2022(args->converter,&(mySource), 
+                                    args->sourceLimit, args->flush,ISO_2022_KR,&plane, err);
+                }
+                if(U_FAILURE(*err)){
+                    args->target = myTarget;
+                    args->source = mySource;
+                    return;
+                }
+                continue;
+            }   

            if(myData->currentType==DBCS){
                if(args->converter->toUnicodeStatus == 0x00){
@ -2218,28 +2271,24 @@ UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
                    tempBuf[1] = (char) (mySourceChar+0x80);
                    mySourceChar = (UChar)(mySourceChar + (args->converter->toUnicodeStatus<<8));
                    args->converter->toUnicodeStatus =0x00;
-                    pBuf = &tempBuf[0];
-                    tempLimit = &tempBuf[2]+1;
-                    targetUniChar = _MBCSSimpleGetNextUChar(myData->fromUnicodeConverter->sharedData,
-                        &pBuf,tempLimit,args->converter->useFallback);
+                    pBuf = tempBuf;
+                    targetUniChar = _MBCSSimpleGetNextUChar(sharedData,
+                        &pBuf,(pBuf+2),useFallback);
                }
            }
            else{
                if(args->converter->fromUnicodeStatus == 0x00){
-                    tempBuf[0] = (char) mySourceChar;
-                    pBuf = &tempBuf[0];
-                    tempLimit = &tempBuf[1];
-                    targetUniChar = _MBCSSimpleGetNextUChar(myData->currentConverter->sharedData,
-                        &pBuf,tempLimit,args->converter->useFallback);
+                    targetUniChar = _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(sharedData, mySourceChar);
+
                }

            }
-            if(targetUniChar < 0xfffe){
+            if(targetUniChar != missingCharMarker){
                if(args->offsets)
                    args->offsets[myTarget - args->target]= mySource - args->source - 1-(myData->currentType==DBCS);
                *(myTarget++)=(UChar)targetUniChar;
            }
-            else if(targetUniChar>=0xfffe){
+            else {
                
                /* Call the callback function*/
                toUnicodeCallback(args,mySourceChar,&mySource,targetUniChar,&myTarget,err);
@ -2411,8 +2460,8 @@ typedef enum  {

 static Cnv2022Type myConverterTypeCN[4]={
        ASCII1,
-        MBCS,
-        MBCS,
+        DBCS,
+        DBCS,
        MBCS
 };

@ -2430,13 +2479,16 @@ UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
    const UChar* source = args->source;
    const UChar* sourceLimit = args->sourceLimit;
    int32_t* offsets = args->offsets;
-    uint32_t targetUniChar = missingCharMarker;
+    int32_t offset =0;
+    uint32_t targetByteUnit = missingCharMarker;
    uint32_t sourceChar  =0x0000;
    const char* escSeq = NULL;
    int len =0; /*length of escSeq chars*/
    uint32_t targetValue=0;
    uint8_t planeVal=0;
    UConverterCallbackReason reason;
+    UConverterSharedData* sharedData=NULL;
+    UBool useFallback = args->converter->useFallback;

    /* state variables*/
    StateEnumCN* currentState     = (StateEnumCN*)&converterData->fromUnicodeCurrentState;
@ -2456,18 +2508,18 @@ UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
    if(U_FAILURE(*err)){
        return;
    }
-
+    /* set up the state */    
    initIterState = *currentState;
+    *currentConverter =converterData->myConverterArray[(*currentConverter==NULL) ? 0 : (int)*currentState];
+    sharedData=(*currentConverter)->sharedData;

    /* check if the last codepoint of previous buffer was a lead surrogate*/
    if(args->converter->fromUSurrogateLead!=0 && target< targetLimit) {
        goto getTrail;
    }
-
    while( source < sourceLimit){

-        *currentConverter =converterData->myConverterArray[(*currentConverter==NULL) ? 0 : (int)*currentState];
-        targetUniChar =missingCharMarker;
+        targetByteUnit =missingCharMarker;
        lPlane =0;

        if(target < targetLimit){
@ -2484,6 +2536,7 @@ getTrail:
                        UChar trail=(UChar) *source;
                        if(UTF_IS_SECOND_SURROGATE(trail)) {
                            source++;
+                            /*(((args->converter->fromUSurrogateLead)<<10L)+(trail)-((0xd800<<10L)+0xdc00-0x10000))*/
                            sourceChar=UTF16_GET_PAIR_VALUE(args->converter->fromUSurrogateLead, trail);
                            args->converter->fromUSurrogateLead=0x00;
                            /* convert this surrogate code point */
@ -2510,7 +2563,7 @@ getTrail:

            /* do the conversion */
            if(sourceChar < 0x007f ){
-                targetUniChar = sourceChar;
+                targetByteUnit = sourceChar;
                if(*currentState!= ASCII_1){
                    *currentState = ASCII_1;
                    *isEscapeAppended = FALSE;
@ -2521,57 +2574,53 @@ getTrail:

                do{
                    if(myConverterTypeCN[*currentState] == MBCS){
-                        len= _MBCSFromUChar32((*currentConverter)->sharedData,sourceChar,
-                                                    &targetValue,args->converter->useFallback);
-                        switch(len){
-                        case 0:
-                            targetUniChar = missingCharMarker;
-                            break;
-                        case 2:
-                            if(( converterData->version) == 0 && *currentState ==ISO_IR_165){
-                                targetUniChar = missingCharMarker;
-                            }else{
-                                targetUniChar = (UChar32) targetValue;
-                            }
-                            break;
-                        case 3:
-                            targetUniChar = (UChar32) targetValue;
+                        /*len= _MBCSFromUChar32((*currentConverter)->sharedData,sourceChar,
+                                                    &targetValue,args->converter->useFallback);*/
+                        MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,targetValue,useFallback,len,MBCS_OUTPUT_3);
+                         if(len==3){
+                            targetByteUnit = (UChar32) targetValue;
                            planeVal = (uint8_t) ((targetValue)>>16);
                            if(planeVal >0x80 && planeVal<0x89){
                                lPlane = (int)(planeVal - 0x80);
-                                targetUniChar -= (planeVal<<16);
+                                targetByteUnit -= (planeVal<<16);
                            }else {
                                lPlane =-1;
+                                targetByteUnit=missingCharMarker;
                            }
                            if(converterData->version == 0 && lPlane >2){
-                                targetUniChar = missingCharMarker;
+                                targetByteUnit = missingCharMarker;
                            }
-                            break;
-                        default:
-                            reason =UCNV_ILLEGAL;
-                            *err =U_INVALID_CHAR_FOUND;
-                            break;
                        }
+                    }else if(myConverterTypeCN[*currentState] == DBCS){
+                        MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,targetValue,useFallback,len,MBCS_OUTPUT_2);
+                        if(len==2){    
+                            if(( converterData->version) == 0 && *currentState ==ISO_IR_165){
+                                targetByteUnit = missingCharMarker;
+                            }else{
+                                targetByteUnit = (UChar32) targetValue;
+                            }
+                        }
+                     
                    }else{
                        if(sourceChar < 0x7f){
-                             targetUniChar = sourceChar;
+                             targetByteUnit = sourceChar;
                        }
                    }
-                    if(targetUniChar==missingCharMarker){
+                    if(targetByteUnit==missingCharMarker){

                        *currentState=(StateEnumCN)((*currentState<3)? *currentState+1:0);
                        *currentConverter =converterData->myConverterArray[(*currentConverter==NULL) ? 0 : (int)*currentState];
-                        targetUniChar =missingCharMarker;
+                        targetByteUnit =missingCharMarker;
                        *isEscapeAppended = FALSE;
                        *isShiftAppended = FALSE;
-
+                        sharedData=(*currentConverter)->sharedData;
                    }
                    else
                        break;
                }while(initIterState != *currentState);

            }
-            if(targetUniChar != missingCharMarker){
+            if(targetByteUnit != missingCharMarker){

                args->converter->fromUnicodeStatus=(UBool) (*currentState > ASCII_1);
                /* Append the escpace sequence */
@ -2580,64 +2629,57 @@ getTrail:
                    temp =(*currentState==CNS_11643) ? ((int)*currentState+lPlane-1):(int)*currentState ;
                    escSeq = escSeqCharsCN[temp];
                    len =escSeqCharsLenCN[temp];
-                    CONCAT_ESCAPE_EX(args, target, targetLimit, offsets, escSeq,len,err);
+                    CONCAT_ESCAPE_EX(args,source, target, targetLimit, offsets, escSeq,len,err);
                    *plane=lPlane;
                    *isEscapeAppended=TRUE;
                }

                /* Append Shift Sequences */
-                switch(*currentState){
-                case ASCII1:
-                    break;
-                case GB2312_1:
-                    /*falls through */
-                case ISO_IR_165:
-                         if(!*isShiftAppended){
-                            len =shiftSeqCharsLenCN[*currentState];
-                            escSeq = shiftSeqCharsCN[*currentState];
-                            CONCAT_ESCAPE_EX(args, target, targetLimit, offsets, escSeq,len,err);
-                            *isShiftAppended=TRUE;
-                         }
-                         break;
-                default:
-                        len =strlen(shiftSeqCharsCN[*currentState+*plane]);
-                        escSeq = shiftSeqCharsCN[*currentState+*plane];
-                        CONCAT_ESCAPE_EX(args, target, targetLimit, offsets, escSeq,len,err);
-                        break;
+                if(*currentState == GB2312_1 || *currentState==ISO_IR_165){
+                    if(!*isShiftAppended){
+                        len =shiftSeqCharsLenCN[*currentState];
+                        escSeq = shiftSeqCharsCN[*currentState];
+                        CONCAT_ESCAPE_EX(args,source, target, targetLimit, offsets, escSeq,len,err);
+                        *isShiftAppended=TRUE;
+                    }
+                }else if(*currentState!=ASCII1){
+                    len =shiftSeqCharsLenCN[*currentState+*plane];
+                    escSeq = shiftSeqCharsCN[*currentState+*plane];
+                    CONCAT_ESCAPE_EX(args,source, target, targetLimit, offsets, escSeq,len,err);
                }

                initIterState = *currentState;

-                /* write the targetUniChar  to target */
-                if(targetUniChar <= 0x00FF){
+                /* write the targetByteUnit  to target */
+                if(targetByteUnit <= 0x00FF){
                    if( target <targetLimit){
-                        *(target++) = (unsigned char) targetUniChar;
+                        *(target++) = (unsigned char) targetByteUnit;
                        if(offsets){
-                            *(offsets++) = source - args->source-1;
+                            *(offsets++) =source-args->source-1;
                        }

                    }else{
-                        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) targetUniChar;
+                        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) targetByteUnit;
                        *err = U_BUFFER_OVERFLOW_ERROR;
                    }
                }else{
                    if(target < targetLimit){
-                        *(target++) =(unsigned char) (targetUniChar>>8);
+                        *(target++) =(unsigned char) (targetByteUnit>>8);
                        if(offsets){
-                            *(offsets++) = source - args->source-1;
+                            *(offsets++) = source-args->source-1;
                        }
                        if(target < targetLimit){
-                            *(target++) =(unsigned char) (targetUniChar);
+                            *(target++) =(unsigned char) (targetByteUnit);
                            if(offsets){
-                                *(offsets++) = source - args->source-1;
+                                *(offsets++) = source-args->source-1;
                            }
                        }else{
-                            args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetUniChar);
+                            args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit);
                            *err = U_BUFFER_OVERFLOW_ERROR;
                        }
                    }else{
-                        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetUniChar>>8);
-                        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetUniChar);
+                        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit>>8);
+                        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit);
                        *err = U_BUFFER_OVERFLOW_ERROR;
                    }
                }
@ -2672,9 +2714,7 @@ callback:
     *flush is TRUE, we can deduce that the input stream is truncated
     */
    if (args->converter->fromUSurrogateLead !=0 && (source == sourceLimit) && args->flush){
-
        *err = U_TRUNCATED_CHAR_FOUND;
-
    }
    /* Reset the state of converter if we consumed 
     * the source and flush is true
@ -2941,6 +2981,7 @@ UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,

            mySourceChar= (unsigned char) *mySource++;

+            
            switch(mySourceChar){
            case UCNV_SI:
                if(args->converter->toUnicodeStatus != 0x00){
@ -3013,13 +3054,10 @@ UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
                myData->plane=plane;
                if(plane>0){
                    myData->currentType = MBCS;
+                }else{
+                    myData->currentType=DBCS;
                }
-                else if(myData->currentConverter &&  
-                            uprv_stricmp("latin_1", 
-                            myData->currentConverter->sharedData->staticData->name)==0){

-                    myData->currentType=ASCII1;
-                }
                /* invalid or illegal escape sequence */
                if(U_FAILURE(*err)){
                    args->target = myTarget;
@ -3031,12 +3069,32 @@ UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
            }
            if(targetUniChar < 0xfffe){
                if(args->offsets){
-
                   args->offsets[myTarget - args->target]= mySource - args->source - 2 
                                                            +(myData->currentType==ASCII);
                }
                *(myTarget++)=(UChar)targetUniChar;
            }
+            else if(targetUniChar > 0xfffe){
+
+                /* disassemble the surrogate pair and write to output*/
+                if(args->offsets){
+                    args->offsets[myTarget - args->target]= mySource - args->source - 2 
+                                                            +(myData->currentType==ASCII);
+                }
+                targetUniChar-=0x0010000;
+                *(myTarget++) =(UChar)(0xd800+(UChar)(targetUniChar>>10));
+                if(myTarget< args->targetLimit){         
+                    if(args->offsets){
+                        args->offsets[myTarget - args->target]= mySource - args->source - 2 
+                                                            +(myData->currentType==ASCII);
+                    }
+                    *(myTarget)++ = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
+                }else{
+                    args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=
+                                    (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));
+                }
+
+            }
            else{
                /* Call the callback function*/
                toUnicodeCallback(args,mySourceChar,&mySource,targetUniChar,&myTarget,err);
--- a/icu4c/source/common/ucnvmbcs.c
+++ b/icu4c/source/common/ucnvmbcs.c
@ -28,6 +28,12 @@
 *   - byte sequences must not have leading zero bytes
 *   - except for SBCS codepages: no fallback mapping from Unicode to a zero byte
 *   - limitation to up to 4 bytes per character
+*
+*   Change history: 
+*
+*    5/6/2001       Ram       Moved  MBCS_SINGLE_RESULT_FROM_U,MBCS_STAGE_2_FROM_U,
+*                             MBCS_VALUE_2_FROM_STAGE_2, MBCS_VALUE_4_FROM_STAGE_2
+*                             macros to ucnvmbcs.h file
 */

 #include "unicode/utypes.h"
@ -269,17 +275,6 @@
 * adding new ones without crashing an unaware converter
 */

-/* single-byte fromUnicode: get the 16-bit result word */
-#define MBCS_SINGLE_RESULT_FROM_U(table, results, c) (results)[ (table)[ (table)[(c)>>10] +(((c)>>4)&0x3f) ] +((c)&0xf) ]
-
-/* multi-byte fromUnicode: get the 32-bit stage 2 entry */
-#define MBCS_STAGE_2_FROM_U(table, c) ((uint32_t *)(table))[ (table)[(c)>>10] +(((c)>>4)&0x3f) ]
-
-#define MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c) ((uint16_t *)(bytes))[16*(uint32_t)(uint16_t)(stage2Entry)+((c)&0xf)]
-#define MBCS_VALUE_4_FROM_STAGE_2(bytes, stage2Entry, c) ((uint32_t *)(bytes))[16*(uint32_t)(uint16_t)(stage2Entry)+((c)&0xf)]
-
-#define MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c) ((bytes)+(16*(uint32_t)(uint16_t)(stage2Entry)+((c)&0xf))*3)
-
 /* prototypes --------------------------------------------------------------- */

 U_CFUNC void
--- a/icu4c/source/common/ucnvmbcs.h
+++ b/icu4c/source/common/ucnvmbcs.h
@ -70,6 +70,18 @@ enum {
 #define MBCS_ENTRY_FINAL_VALUE(entry) ((entry)&0xfffff)
 #define MBCS_ENTRY_FINAL_VALUE_16(entry) (uint16_t)(entry)

+/* single-byte fromUnicode: get the 16-bit result word */
+#define MBCS_SINGLE_RESULT_FROM_U(table, results, c) (results)[ (table)[ (table)[(c)>>10] +(((c)>>4)&0x3f) ] +((c)&0xf) ]
+
+/* multi-byte fromUnicode: get the 32-bit stage 2 entry */
+#define MBCS_STAGE_2_FROM_U(table, c) ((uint32_t *)(table))[ (table)[(c)>>10] +(((c)>>4)&0x3f) ]
+
+#define MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c) ((uint16_t *)(bytes))[16*(uint32_t)(uint16_t)(stage2Entry)+((c)&0xf)]
+#define MBCS_VALUE_4_FROM_STAGE_2(bytes, stage2Entry, c) ((uint32_t *)(bytes))[16*(uint32_t)(uint16_t)(stage2Entry)+((c)&0xf)]
+
+#define MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c) ((bytes)+(16*(uint32_t)(uint16_t)(stage2Entry)+((c)&0xf))*3)
+
+
 /**
 * MBCS output types for conversions from Unicode.
 * These per-converter types determine the storage method in stage 3 of the lookup table,