ICU-6583 Port over illegal sequence handling code from ticket #5691 to ICU4J. Fix minor bugs in various callback functions and error handling code in ICU4J. Reenable "full" data driven conversion test.

X-SVN-Rev: 25468
2009-02-23 21:38:21 +00:00 · 2009-02-23 21:38:21 +00:00 · 27ce5a3df5
commit 27ce5a3df5
parent e729683a89
7 changed files with 417 additions and 562 deletions
--- a/icu4j/src/com/ibm/icu/charset/CharsetCallback.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetCallback.java
@ -1,6 +1,6 @@
 /**
 *******************************************************************************
-* Copyright (C) 2006-2008, International Business Machines Corporation and    *
+* Copyright (C) 2006-2009, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
@ -332,10 +332,7 @@ public class CharsetCallback {
                    }
                }
            }
-            
-            /* reset the error */
-            cr = CoderResult.UNDERFLOW;
-            
+
            cr = encoder.cbFromUWriteUChars(encoder, CharBuffer.wrap(valueString, 0, valueStringLength), target, offsets);
            return cr;
        }
@ -356,7 +353,7 @@ public class CharsetCallback {
            if (context == null || !(context instanceof String)) {
                while (i < length) {
                    uniValueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT;   /* adding % */
-                    uniValueString[valueStringLength++] = UNICODE_X_CODEPOINT;              /* adding X */
+                    uniValueString[valueStringLength++] = UNICODE_X_CODEPOINT;              /* adding U */
                    valueStringLength += itou(uniValueString, valueStringLength, buffer[i++] & UConverterConstants.UNSIGNED_BYTE_MASK, 16, 2);
                }
            } else {
@ -376,9 +373,11 @@ public class CharsetCallback {
                        uniValueString[valueStringLength++] = UNICODE_SEMICOLON_CODEPOINT;  /* adding ; */
                    }
                } else if (((String)context).equals(ESCAPE_C)) {
-                    uniValueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT;   /* adding % */
-                    uniValueString[valueStringLength++] = UNICODE_X_CODEPOINT;              /* adding X */
-                    valueStringLength += itou(uniValueString, valueStringLength, buffer[i++] & UConverterConstants.UNSIGNED_BYTE_MASK, 16, 2);
+                    while (i < length) {
+                        uniValueString[valueStringLength++] = UNICODE_RS_CODEPOINT;         /* adding \ */
+                        uniValueString[valueStringLength++] = UNICODE_X_LOW_CODEPOINT;      /* adding x */
+                        valueStringLength += itou(uniValueString, valueStringLength, buffer[i++] & UConverterConstants.UNSIGNED_BYTE_MASK, 16, 2);
+                    }
                } else {
                    while (i < length) {
                        uniValueString[valueStringLength++] = UNICODE_PERCENT_SIGN_CODEPOINT;   /* adding % */
@ -388,10 +387,8 @@ public class CharsetCallback {
                    }
                }
            }
-            /* reset the error */
-            cr = CoderResult.UNDERFLOW;
            
-            CharsetDecoderICU.toUWriteUChars(decoder, uniValueString, 0, valueStringLength, target, offsets, 0);
+            cr = CharsetDecoderICU.toUWriteUChars(decoder, uniValueString, 0, valueStringLength, target, offsets, 0);
            
            return cr;
        }
--- a/icu4j/src/com/ibm/icu/charset/CharsetDecoderICU.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetDecoderICU.java
@ -1,6 +1,6 @@
 /**
 *******************************************************************************
-* Copyright (C) 2006-2008, International Business Machines Corporation and    *
+* Copyright (C) 2006-2009, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
@ -502,6 +502,8 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{
    
                        //UConverterUtility.uprv_memcpy(replayArray, replayBegin, preToUArray, preToUBegin, -preToULength);
                        replayArray.put(preToUArray,0, -preToULength);
+                        // reset position
+                        replayArray.position(0);

                        source=replayArray;
                        source.limit(replayArrayIndex-preToULength);
@ -649,7 +651,7 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{

    private void copy(byte[] src, int srcOffset, char[] dst, int dstOffset, int length) {
        for(int i=srcOffset; i<length; i++){
-            dst[dstOffset++]=(char)src[srcOffset++];
+            dst[dstOffset++]=(char)(src[srcOffset++] & UConverterConstants.UNSIGNED_BYTE_MASK);
        }
    }
    /*
--- a/icu4j/src/com/ibm/icu/charset/CharsetHZ.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetHZ.java
@ -1,6 +1,6 @@
 /*
 *******************************************************************************
- * Copyright (C) 2008, International Business Machines Corporation and         *
+ * Copyright (C) 2008-2009, International Business Machines Corporation and         *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 */
@ -58,6 +58,7 @@ class CharsetHZ extends CharsetICU {
        }

        protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
+            CoderResult err = CoderResult.UNDERFLOW;
            byte[] tempBuf = new byte[2];
            int targetUniChar = 0;
            int mySourceChar = 0;
@ -104,10 +105,25 @@ class CharsetHZ extends CharsetICU {
                             * if the first byte is equal to TILDE and the trail byte is not a valid byte then it is an
                             * error condition
                             */
-                            mySourceChar |= 0x7e00;
-                            targetUniChar = 0xffff;
-                            isEmptySegment = false; /* different error here, reset this to avoid spurious future error */ 
-                            break;
+                            /*
+                             * Ticket 5691: consistent illegal sequences:
+                             * - We include at least the first byte in the illegal sequence.
+                             * - If any of the non-initial bytes could be the start of a character,
+                             *   we stop the illegal sequence before the first one of those.
+                             */
+                            isEmptySegment = false; /* different error here, reset this to avoid spurious furture error */
+                            err = CoderResult.malformedForLength(1);
+                            toUBytesArray[0] = UCNV_TILDE;
+                            if (isStateDBCS ? (0x21 <= mySourceChar && mySourceChar <= 0x7e) : mySourceChar <= 0x7f) {
+                                /* The current byte could be the start of a character: Back it out. */
+                                toULength = 1;
+                                source.position(source.position() - 1);
+                            } else {
+                                /* Include the current byte in the illegal sequence. */
+                                toUBytesArray[1] = (byte)mySourceChar;
+                                toULength = 2;
+                            }
+                            return err;
                        }
                    } else if (isStateDBCS) {
                        if (toUnicodeStatus == 0) {
@ -124,19 +140,36 @@ class CharsetHZ extends CharsetICU {
                            continue;
                        } else {
                            /* trail byte */
+                            boolean leadIsOk, trailIsOk;
                            int leadByte = toUnicodeStatus & 0xff;
-                            if (0x21 <= leadByte && leadByte <= 0x7d && 0x21 <= mySourceChar && mySourceChar <= 0x7e) {
-                                tempBuf[0] = (byte) (leadByte + 0x80);
-                                tempBuf[1] = (byte) (mySourceChar + 0x80);
-                                targetUniChar = gbDecoder.simpleGetNextUChar(ByteBuffer.wrap(tempBuf), super.isFallbackUsed());
-                            } else {
-                                targetUniChar = 0xffff;
-                            }
+                            targetUniChar = 0xffff;
                            /*
-                             * add another bit so that the code below writes 2 bytes in case of error
+                             * Ticket 5691: consistent illegal sequence
+                             * - We include at least the first byte in the illegal sequence.
+                             * - If any of the non-initial bytes could be the start of a character,
+                             *   we stop the illegal sequence before the first one of those
+                             * 
+                             * In HZ DBCS, if the second byte is in the 21..7e range,
+                             * we report ony the first byte as the illegal sequence.
+                             * Otherwise we convert of report the pair of bytes.
                             */
-                            mySourceChar |= 0x10000 | (leadByte << 8);
-                            toUnicodeStatus = 0;
+                            leadIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (leadByte - 0x21)) <= (0x7d - 0x21);
+                            trailIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (mySourceChar - 0x21)) <= (0x7e - 0x21);
+                            if (leadIsOk && trailIsOk) {
+                                tempBuf[0] = (byte)(leadByte + 0x80);
+                                tempBuf[1] = (byte)(mySourceChar + 0x80);
+                                targetUniChar = gbDecoder.simpleGetNextUChar(ByteBuffer.wrap(tempBuf), super.isFallbackUsed());
+                                mySourceChar = (leadByte << 8) | mySourceChar;
+                            } else if (trailIsOk) {
+                                /* report a single illegal byte and continue with the following DBCS starter byte */
+                                source.position(source.position() - 1);
+                                mySourceChar = (int)leadByte;
+                            } else {
+                                /* report a pair of illegal bytes if the second byte is not a DBCS starter */
+                                /* add another bit so that the code below writes 2 bytes in case of error */
+                                mySourceChar = 0x10000 | (leadByte << 8) | mySourceChar;
+                            }
+                            toUnicodeStatus = 0x00;
                        }
                    } else {
                        if (mySourceChar == UCNV_TILDE) {
@ -177,7 +210,7 @@ class CharsetHZ extends CharsetICU {
                }
            }

-            return CoderResult.UNDERFLOW;
+            return err;
        }
    }

--- a/icu4j/src/com/ibm/icu/charset/CharsetISO2022.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetISO2022.java
@ -1,6 +1,6 @@
 /*
 *******************************************************************************
- * Copyright (C) 2008, International Business Machines Corporation and         *
+ * Copyright (C) 2008-2009, International Business Machines Corporation and         *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 */
@ -200,10 +200,12 @@ class CharsetISO2022 extends CharsetICU {
    }
    
    /*
+     * Commented out because Ticket 5691: Call sites now check for validity. They can just += 0x8080 after that. 
+     * 
     * This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code point, it returns the
     * 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point
     * unchanged. 
-     */
+     * 
    private static int _2022ToGR94DBCS(int value) {
        int returnValue = value + 0x8080;
        
@ -213,7 +215,7 @@ class CharsetISO2022 extends CharsetICU {
        } else {
            return value;
        }
-    }
+    }*/
    
    /* is the StateEnum charset value for a DBCS charset? */
    private static boolean IS_JP_DBCS(byte cs) {
@ -528,6 +530,7 @@ class CharsetISO2022 extends CharsetICU {
        byte value;
        int key[] = {myConverterData.key};
        int offset[] = {0};
+        int initialToULength = decoder.toULength;
        byte c;
        int malformLength = 0;
        
@ -571,7 +574,7 @@ class CharsetISO2022 extends CharsetICU {
            /* indicate that the escape sequence is incomplete: key !=0 */
            return err;
        } else if (value == INVALID_2022) {
-            return CoderResult.malformedForLength(malformLength);
+            err = CoderResult.malformedForLength(malformLength);
        } else /* value == VALID_TERMINAL_2022 */ {
            switch (var) {
            case ISO_2022_JP: {
@ -679,7 +682,39 @@ class CharsetISO2022 extends CharsetICU {
        }
        if (!err.isError()) {
            decoder.toULength = 0;
+        } else if (err.isMalformed()) {
+            if (decoder.toULength > 1) {
+                /*
+                 * Ticket 5691: consistent illegal sequences:
+                 * - We include at least the first byte (ESC) in the illegal sequence.
+                 * - If any of the non-initial bytes could be the start of a character,
+                 *   we stop the illegal sequece before the first one of those.
+                 *   In escape sequences, all following bytes are "printable", that is,
+                 *   unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS),
+                 *   they are valid single/lead bytes.
+                 *   For simplicity, we always only report the initial ESC byte as the
+                 *   illegal sequence and back out all other bytes we looked at.
+                 */
+                /* Back out some bytes. */
+                int backOutDistance = decoder.toULength - 1;
+                int bytesFromThisBuffer = decoder.toULength - initialToULength;
+                if (backOutDistance <= bytesFromThisBuffer) {
+                    /* same as initialToULength<=1 */
+                    source.position(source.position() - backOutDistance);
+                } else {
+                    /* Back out bytes from the previous buffer: Need to replay them. */
+                    decoder.preToULength = (byte)(bytesFromThisBuffer - backOutDistance);
+                    /* same as -(initalToULength-1) */
+                    /* preToULength is negative! */
+                    for (int i = 0; i < -(decoder.preToULength); i++) {
+                        decoder.preToUArray[i] = decoder.toUBytesArray[i+1];
+                    }
+                    source.position(source.position() - bytesFromThisBuffer);
+                }
+                decoder.toULength = 1;
+            }
        }
+        
        return err;
    }
    
@ -820,7 +855,7 @@ class CharsetISO2022 extends CharsetICU {
                gotoEscape = true;
            } else if (toULength == 1 && source.hasRemaining() && target.hasRemaining()) {
                /* continue with a partial double-byte character */
-                mySourceChar = toUBytesArray[0];
+                mySourceChar = (toUBytesArray[0] & UConverterConstants.UNSIGNED_BYTE_MASK);
                toULength = 0;
                cs = myConverterData.toU2022State.cs[myConverterData.toU2022State.g];
                // goto getTrailByte;
@ -838,7 +873,7 @@ class CharsetISO2022 extends CharsetICU {
                
                if (gotoEscape || gotoGetTrail || target.hasRemaining()) {
                    if (!gotoEscape && !gotoGetTrail) {
-                        mySourceChar = UConverterConstants.UNSIGNED_BYTE_MASK & source.get();
+                        mySourceChar = source.get() & UConverterConstants.UNSIGNED_BYTE_MASK;
                        mySourceCharTemp = mySourceChar;
                    }
                    
@ -963,26 +998,48 @@ class CharsetISO2022 extends CharsetICU {
 // getTrailByte:
                                    int tmpSourceChar;
                                    gotoGetTrail = false;
-                                    byte trailByte;
-                                    trailByte = source.get();
-                                    tmpSourceChar = (mySourceChar << 8) | (short)(UConverterConstants.UNSIGNED_BYTE_MASK & trailByte);
-                                    if (cs == JISX208) {
-                                        _2022ToSJIS((char)(UConverterConstants.UNSIGNED_BYTE_MASK & mySourceChar), 
-                                                (char)(UConverterConstants.UNSIGNED_BYTE_MASK & trailByte), tempBuf);
-                                    } else {
-                                        if (cs == KSC5601) {
-                                            tmpSourceChar = _2022ToGR94DBCS(tmpSourceChar);
+                                    short trailByte;
+                                    boolean leadIsOk, trailIsOk;
+                                    
+                                    trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK);
+                                    /*
+                                     * Ticket 5691: consistent illegal sequences:
+                                     * - We include at least the first byte in the illegal sequence.
+                                     * - If any of the non-initial bytes could be the start of a character,
+                                     *   we stop the illegal sequence before the first one of those.
+                                     * 
+                                     * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
+                                     * an ESC/SO/SI, we report only the first byte as the illegal sequence.
+                                     * Otherwise we convert or report the pair of bytes.
+                                     */
+                                    leadIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (mySourceChar - 0x21)) <= (0x7e - 0x21);
+                                    trailIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (trailByte - 0x21)) <= (0x7e - 0x21);
+                                    if (leadIsOk && trailIsOk) {
+                                        source.get();
+                                        tmpSourceChar = (mySourceChar << 8) | trailByte;
+                                        if (cs == JISX208) {
+                                            _2022ToSJIS((char)mySourceChar, (char)trailByte, tempBuf);
+                                            mySourceChar = tmpSourceChar;
+                                        } else {
+                                            /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */
+                                            mySourceChar = tmpSourceChar;
+                                            if (cs == KSC5601) {
+                                                tmpSourceChar += 0x8080; /* = _2022ToGR94DBCS(tmpSourceChar) */
+                                            }
+                                            tempBuf[0] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (tmpSourceChar >> 8));
+                                            tempBuf[1] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & tmpSourceChar);
                                        }
-                                        tempBuf[0] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & (tmpSourceChar >> 8));
-                                        tempBuf[1] = (byte)(UConverterConstants.UNSIGNED_BYTE_MASK & tmpSourceChar);
+                                        targetUniChar = MBCSSimpleGetNextUChar(myConverterData.myConverterArray[cs], ByteBuffer.wrap(tempBuf), false);
+                                    } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
+                                        /* report a pair of illegal bytes if the second byte is not a DBCS starter */
+                                        source.get();
+                                        /* add another bit so that the code below writes 2 bytes in case of error */
+                                        mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
                                    }
-                                    ByteBuffer tempByteBuf = ByteBuffer.wrap(tempBuf);
-                                    targetUniChar = MBCSSimpleGetNextUChar(myConverterData.myConverterArray[cs], tempByteBuf, false);
-                                    mySourceChar = tmpSourceChar;
                                } else {
                                    toUBytesArray[0] = (byte)mySourceChar;
                                    toULength = 1;
-                                    // goto endloop;
+                                    // goto endloop
                                    return err;
                                }
                            } /* end of inner switch */
@ -1056,8 +1113,9 @@ class CharsetISO2022 extends CharsetICU {
                gotoEscape = true;
            } else if (toULength == 1 && source.hasRemaining() && target.hasRemaining()) {
                /* continue with a partial double-byte character */
-                mySourceChar = toUBytesArray[0];
+                mySourceChar = (toUBytesArray[0] & UConverterConstants.UNSIGNED_BYTE_MASK);
                toULength = 0;
+                targetUniChar = UConverterConstants.missingCharMarker;
                // goto getTrailByte
                gotoGetTrailByte = true;
            }
@ -1139,36 +1197,58 @@ class CharsetISO2022 extends CharsetICU {
                                UConverterSharedData cnv;
                                byte tempState;
                                int tempBufLen;
-                                byte trailByte;
+                                boolean leadIsOk, trailIsOk;
+                                short trailByte;
 // getTrailByte: label
                                gotoGetTrailByte = false; // reset gotoGetTrailByte
                                
-                                trailByte = source.get();
-                                tempState = myConverterData.toU2022State.cs[myConverterData.toU2022State.g];
-                                if (tempState > CNS_11643_0) {
-                                    cnv = myConverterData.myConverterArray[CNS_11643];
-                                    tempBuf[0] = (byte)(0x80 + (tempState - CNS_11643_0));
-                                    tempBuf[1] = (byte)(mySourceChar);
-                                    tempBuf[2] = trailByte;
-                                    tempBufLen = 3;
-                                } else {
-                                    cnv = myConverterData.myConverterArray[tempState];
-                                    tempBuf[0] = (byte)(mySourceChar);
-                                    tempBuf[1] = trailByte;
-                                    tempBufLen = 2;
+                                trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK);
+                                /*
+                                 * Ticket 5691: consistent illegal sequences:
+                                 * - We include at least the first byte in the illegal sequence.
+                                 * - If any of the non-initial bytes could be the start of a character,
+                                 *   we stop the illegal sequence before the first one of those.
+                                 * 
+                                 * In ISO-2022 DBCS, if the second byte is in the range 21..7e range or is
+                                 * an ESC/SO/SI, we report only the first byte as the illegal sequence.
+                                 * Otherwise we convert or report the pair of bytes.
+                                 */
+                                leadIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (mySourceChar - 0x21)) <= (0x7e - 0x21);
+                                trailIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (trailByte - 0x21)) <= (0x7e - 0x21);
+                                if (leadIsOk && trailIsOk) {
+                                    source.get();
+                                    tempState = myConverterData.toU2022State.cs[myConverterData.toU2022State.g];
+                                    if (tempState > CNS_11643_0) {
+                                        cnv = myConverterData.myConverterArray[CNS_11643];
+                                        tempBuf[0] = (byte)(0x80 + (tempState - CNS_11643_0));
+                                        tempBuf[1] = (byte)mySourceChar;
+                                        tempBuf[2] = (byte)trailByte;
+                                        tempBufLen = 3;
+                                    } else {
+                                        cnv = myConverterData.myConverterArray[tempState];
+                                        tempBuf[0] = (byte)mySourceChar;
+                                        tempBuf[1] = (byte)trailByte;
+                                        tempBufLen = 2;
+                                    }
+                                    ByteBuffer tempBuffer = ByteBuffer.wrap(tempBuf);
+                                    tempBuffer.limit(tempBufLen);
+                                    targetUniChar = MBCSSimpleGetNextUChar(cnv, tempBuffer, false);
+                                    mySourceChar = (mySourceChar << 8) | trailByte;
+                                    
+                                } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
+                                    /* report a pair of illegal bytes if the second byte is not a DBCS starter */
+                                    source.get();
+                                    /* add another bit so that the code below writes 2 bytes in case of error */
+                                    mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
                                }
-                                mySourceChar = (mySourceChar << 8) | (UConverterConstants.UNSIGNED_BYTE_MASK & trailByte);
                                if (myConverterData.toU2022State.g >= 2) {
                                    /* return from a single-shift state to the previous one */
                                    myConverterData.toU2022State.g = myConverterData.toU2022State.prevG;
                                }
-                                ByteBuffer tempBuffer = ByteBuffer.wrap(tempBuf);
-                                tempBuffer.limit(tempBufLen);
-                                tempBuffer.position(0);
-                                targetUniChar = MBCSSimpleGetNextUChar(cnv, tempBuffer, false);
                            } else {
                                toUBytesArray[0] = (byte)mySourceChar;
                                toULength = 1;
+                                // goto endloop;
                                return err;
                            }
                        } else {
@ -1228,7 +1308,7 @@ class CharsetISO2022 extends CharsetICU {
        
        protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
            CoderResult err = CoderResult.UNDERFLOW;
-            char mySourceChar = 0x0000;
+            int mySourceChar = 0x0000;
            int targetUniChar = 0x0000;
            byte[] tempBuf = new byte[2];
            boolean usingFallback;
@ -1247,7 +1327,7 @@ class CharsetISO2022 extends CharsetICU {
                gotoEscape = true;
            } else if (toULength == 1 && source.hasRemaining() && target.hasRemaining()) {
                /* continue with a partial double-byte character */
-                mySourceChar = (char)toUBytesArray[0];
+                mySourceChar = (toUBytesArray[0] & UConverterConstants.UNSIGNED_BYTE_MASK);
                toULength = 0;
                gotoGetTrailByte = true;
            }
@ -1255,7 +1335,7 @@ class CharsetISO2022 extends CharsetICU {
            while (source.hasRemaining() || gotoGetTrailByte || gotoEscape) {
                if (target.hasRemaining() || gotoGetTrailByte || gotoEscape) {
                    if (!gotoGetTrailByte && !gotoEscape) {
-                        mySourceChar = (char)(source.get()&UConverterConstants.UNSIGNED_BYTE_MASK);
+                        mySourceChar = (char)(source.get() & UConverterConstants.UNSIGNED_BYTE_MASK);
                    }
                    
                    if (!gotoGetTrailByte && !gotoEscape && mySourceChar == UConverterConstants.SI) {
@ -1290,31 +1370,52 @@ class CharsetISO2022 extends CharsetICU {
                    myConverterData.isEmptySegment = false; /* Any invalid char errors will be detected separately, so just reset this */
                    if (myConverterData.toU2022State.g == 1 || gotoGetTrailByte) {
                        if (source.hasRemaining() || gotoGetTrailByte) {
+                            boolean leadIsOk, trailIsOk;
+                            short trailByte;
 // getTrailByte label
                            gotoGetTrailByte = false; // reset gotoGetTrailByte flag
                            
-                            byte trailByte;
-                            trailByte = source.get();
-                            tempBuf[0] = (byte)(mySourceChar + 0x80);
-                            tempBuf[1] = (byte)(trailByte + 0x80);
-                            mySourceChar = (char)((mySourceChar << 8) | (short)(trailByte&UConverterConstants.UNSIGNED_BYTE_MASK));
-                            if ((mySourceChar & 0x8080) == 0) {
+                            trailByte = (short)(source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK);
+                            targetUniChar = UConverterConstants.missingCharMarker;
+                            /*
+                             * Ticket 5691: consistent illegal sequences:
+                             * - We include at least the first byte in the illegal sequence.
+                             * - If any of the non-initial bytes could be the start of a character,
+                             *   we stop the illegal sequence before the first one of those.
+                             * 
+                             * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
+                             * an ESC/SO/SI, we report only the first byte as the illegal sequence.
+                             * Otherwise we convert or report the pair of bytes.
+                             */
+                            leadIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (mySourceChar - 0x21)) <= (0x7e - 0x21);
+                            trailIsOk = (short)(UConverterConstants.UNSIGNED_BYTE_MASK & (trailByte - 0x21)) <= (0x7e - 0x21);
+                            if (leadIsOk && trailIsOk) {
+                                source.get();
+                                tempBuf[0] = (byte)(mySourceChar + 0x80);
+                                tempBuf[1] = (byte)(trailByte + 0x80);
                                targetUniChar = MBCSSimpleGetNextUChar(myConverterData.currentConverter.sharedData, ByteBuffer.wrap(tempBuf), usingFallback);
-                            } else {
-                                /* illegal bytes > 0x7f */
-                                targetUniChar = UConverterConstants.missingCharMarker;
+                                mySourceChar = (char)((mySourceChar << 8) | trailByte);
+                            } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
+                                /* report a pair of illegal bytes if the second byte is not a DBCS starter */
+                                source.get();
+                                /* add another bit so that the code below writes 2 bytes in case of error */
+                                mySourceChar = (char)(0x10000 | (mySourceChar << 8) | trailByte);
                            }
                        } else {
                            toUBytesArray[0] = (byte)mySourceChar;
                            toULength = 1;
                            break;
                        }
-                    } else {
-                        int oldSourceLimit = source.limit();
+                    } else if (mySourceChar <= 0x7f) {
+                        int savedSourceLimit = source.limit();
+                        int savedSourcePosition = source.position();
                        source.limit(source.position());
                        source.position(source.position()-1); 
                        targetUniChar = MBCSSimpleGetNextUChar(myConverterData.currentConverter.sharedData, source, usingFallback);
-                        source.limit(oldSourceLimit);
+                        source.limit(savedSourceLimit);
+                        source.position(savedSourcePosition);
+                    } else {
+                        targetUniChar = 0xffff;
                    }
                    if (targetUniChar < 0xfffe) {
                        target.put((char)targetUniChar);
@ -1412,7 +1513,7 @@ class CharsetISO2022 extends CharsetICU {
                        }
                    }
                    
-                    if (err.isError() || (source.position() == source.limit())) {
+                    if (err.isError() || err.isOverflow() || (source.position() == source.limit())) {
                        return err;
                    }
                }
@ -2580,7 +2681,11 @@ class CharsetISO2022 extends CharsetICU {
                        }
                        /* only DBCS or SBCS characters are expected */
                        /* DB characters with high bit set to 1 are expected */
-                        if (length > 2 || length == 0 || (((targetByteUnit[0] & 0x8080) != 0x8080) && length == 2)) {
+                        if (length > 2 || length == 0 ||
+                                (length == 1 && targetByteUnit[0] > 0x7f) ||
+                                (length ==2 &&
+                                        ((char)(targetByteUnit[0] - 0xa1a1) > (0xfefe - 0xa1a1) ||
+                                        ((targetByteUnit[0] - 0xa1) & UConverterConstants.UNSIGNED_BYTE_MASK) > (0xfe - 0xa1)))) {
                            targetByteUnit[0] = UConverterConstants.missingCharMarker;
                        }
                    }
--- a/icu4j/src/com/ibm/icu/charset/CharsetMBCS.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetMBCS.java
@ -1,6 +1,6 @@
 /**
 *******************************************************************************
- * Copyright (C) 2006-2008, International Business Machines Corporation and    *
+ * Copyright (C) 2006-2009, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
@ -1573,330 +1573,8 @@ class CharsetMBCS extends CharsetICU {
        }

        protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
-            CoderResult[] cr = { CoderResult.UNDERFLOW };
-
-            int sourceArrayIndex;
-            int stateTable[][/* 256 */];
-            char[] unicodeCodeUnits;
-
-            int offset;
-            byte state;
-            int byteIndex;
-            byte[] bytes;
-
-            int sourceIndex, nextSourceIndex;
-
-            int entry = 0;
-            char c;
-            byte action;
-
-            if (preToULength > 0) {
-                /*
-                 * pass sourceIndex=-1 because we continue from an earlier buffer in the future, this may change with
-                 * continuous offsets
-                 */
-                cr[0] = continueMatchToU(source, target, offsets, -1, flush);
-
-                if (cr[0].isError() || preToULength < 0) {
-                    return cr[0];
-                }
-            }
-
-            if (sharedData.mbcs.countStates == 1) {
-                if ((sharedData.mbcs.unicodeMask & UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
-                    cr[0] = cnvMBCSSingleToBMPWithOffsets(source, target, offsets, flush);
-                } else {
-                    cr[0] = cnvMBCSSingleToUnicodeWithOffsets(source, target, offsets, flush);
-                }
-                return cr[0];
-            }
-
-            /* set up the local pointers */
-            sourceArrayIndex = source.position();
-
-            if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
-                stateTable = sharedData.mbcs.swapLFNLStateTable;
-            } else {
-                stateTable = sharedData.mbcs.stateTable;
-            }
-            unicodeCodeUnits = sharedData.mbcs.unicodeCodeUnits;
-
-            /* get the converter state from UConverter */
-            offset = (int) toUnicodeStatus;
-            byteIndex = toULength;
-            bytes = toUBytesArray;
-
-            /*
-             * if we are in the SBCS state for a DBCS-only converter, then load the DBCS state from the MBCS data
-             * (dbcsOnlyState==0 if it is not a DBCS-only converter)
-             */
-            state = (byte)mode;
-            if (state == 0) {
-                state = sharedData.mbcs.dbcsOnlyState;
-            }
-
-            /* sourceIndex=-1 if the current character began in the previous buffer */
-            sourceIndex = byteIndex == 0 ? 0 : -1;
-            nextSourceIndex = 0;
-
-            /* conversion loop */
-            while (sourceArrayIndex < source.limit()) {
-                /*
-                 * This following test is to see if available input would overflow the output. It does not catch output
-                 * of more than one code unit that overflows as a result of a surrogate pair or callback output from the
-                 * last source byte. Therefore, those situations also test for overflows and will then break the loop,
-                 * too.
-                 */
-                if (!target.hasRemaining()) {
-                    /* target is full */
-                    cr[0] = CoderResult.OVERFLOW;
-                    break;
-                }
-
-                if (byteIndex == 0) {
-                    /* optimized loop for 1/2-byte input and BMP output */
-                    // agljport:todo see ucnvmbcs.c for deleted block
-                    do {
-                        entry = stateTable[state][source.get(sourceArrayIndex)&UConverterConstants.UNSIGNED_BYTE_MASK];
-                        if (MBCS_ENTRY_IS_TRANSITION(entry)) {
-                            state = (byte) MBCS_ENTRY_TRANSITION_STATE(entry);
-                            offset = MBCS_ENTRY_TRANSITION_OFFSET(entry);
-                            ++sourceArrayIndex;
-                            if (sourceArrayIndex < source.limit()
-                                    && MBCS_ENTRY_IS_FINAL(entry = stateTable[state][source.get(sourceArrayIndex)&UConverterConstants.UNSIGNED_BYTE_MASK])
-                                    && MBCS_ENTRY_FINAL_ACTION(entry) == MBCS_STATE_VALID_16
-                                    && (c = unicodeCodeUnits[offset + MBCS_ENTRY_FINAL_VALUE_16(entry)]) < 0xfffe) {
-                                ++sourceArrayIndex;
-                                target.put(c);
-                                if (offsets != null) {
-                                    offsets.put(sourceIndex);
-                                    sourceIndex = (nextSourceIndex += 2);
-                                }
-                                state = (byte) MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
-                                offset = 0;
-                            } else {
-                                /* set the state and leave the optimized loop */
-                                ++nextSourceIndex;
-                                bytes[0] = source.get(sourceArrayIndex - 1);
-                                byteIndex = 1;
-                                break;
-                            }
-                        } else {
-                            if (MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
-                                /* output BMP code point */
-                                ++sourceArrayIndex;
-                                target.put((char) MBCS_ENTRY_FINAL_VALUE_16(entry));
-                                if (offsets != null) {
-                                    offsets.put(sourceIndex);
-                                    sourceIndex = ++nextSourceIndex;
-                                }
-                                state = (byte) MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
-                            } else {
-                                /* leave the optimized loop */
-                                break;
-                            }
-                        }
-                    } while (sourceArrayIndex < source.limit() && target.hasRemaining());
-                    /*
-                     * these tests and break statements could be put inside the loop if C had "break outerLoop" like
-                     * Java
-                     */
-                    if (sourceArrayIndex >= source.limit()) {
-                        break;
-                    }
-                    if (!target.hasRemaining()) {
-                        /* target is full */
-                        cr[0] = CoderResult.OVERFLOW;
-                        break;
-                    }
-
-                    ++nextSourceIndex;
-                    bytes[byteIndex++] = source.get(sourceArrayIndex++);
-                } else /* byteIndex>0 */{
-                    ++nextSourceIndex;
-                    entry = stateTable[state][(bytes[byteIndex++] = source.get(sourceArrayIndex++))
-                            & UConverterConstants.UNSIGNED_BYTE_MASK];
-                }
-
-                if (MBCS_ENTRY_IS_TRANSITION(entry)) {
-                    state = (byte) MBCS_ENTRY_TRANSITION_STATE(entry);
-                    offset += MBCS_ENTRY_TRANSITION_OFFSET(entry);
-                    continue;
-                }
-
-                /* save the previous state for proper extension mapping with SI/SO-stateful converters */
-                mode = state;
-
-                /* set the next state early so that we can reuse the entry variable */
-                state = (byte) MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
-
-                /*
-                 * An if-else-if chain provides more reliable performance for the most common cases compared to a
-                 * switch.
-                 */
-                action = (byte) (MBCS_ENTRY_FINAL_ACTION(entry));
-                if (action == MBCS_STATE_VALID_16) {
-                    offset += MBCS_ENTRY_FINAL_VALUE_16(entry);
-                    c = unicodeCodeUnits[offset];
-                    if (c < 0xfffe) {
-                        /* output BMP code point */
-                        target.put(c);
-                        if (offsets != null) {
-                            offsets.put(sourceIndex);
-                        }
-                        byteIndex = 0;
-                    } else if (c == 0xfffe) {
-                        if (isFallbackUsed() && (entry = (int) getFallback(sharedData.mbcs, offset)) != 0xfffe) {
-                            /* output fallback BMP code point */
-                            target.put((char) entry);
-                            if (offsets != null) {
-                                offsets.put(sourceIndex);
-                            }
-                            byteIndex = 0;
-                        }
-                    } else {
-                        /* callback(illegal) */
-                        cr[0] = CoderResult.malformedForLength(byteIndex);
-                    }
-                } else if (action == MBCS_STATE_VALID_DIRECT_16) {
-                    /* output BMP code point */
-                    target.put((char) MBCS_ENTRY_FINAL_VALUE_16(entry));
-                    if (offsets != null) {
-                        offsets.put(sourceIndex);
-                    }
-                    byteIndex = 0;
-                } else if (action == MBCS_STATE_VALID_16_PAIR) {
-                    offset += MBCS_ENTRY_FINAL_VALUE_16(entry);
-                    c = unicodeCodeUnits[offset++];
-                    if (c < 0xd800) {
-                        /* output BMP code point below 0xd800 */
-                        target.put(c);
-                        if (offsets != null) {
-                            offsets.put(sourceIndex);
-                        }
-                        byteIndex = 0;
-                    } else if (isFallbackUsed() ? c <= 0xdfff : c <= 0xdbff) {
-                        /* output roundtrip or fallback surrogate pair */
-                        target.put((char) (c & 0xdbff));
-                        if (offsets != null) {
-                            offsets.put(sourceIndex);
-                        }
-                        byteIndex = 0;
-                        if (target.hasRemaining()) {
-                            target.put(unicodeCodeUnits[offset]);
-                            if (offsets != null) {
-                                offsets.put(sourceIndex);
-                            }
-                        } else {
-                            /* target overflow */
-                            charErrorBufferArray[0] = unicodeCodeUnits[offset];
-                            charErrorBufferLength = 1;
-                            cr[0] = CoderResult.OVERFLOW;
-
-                            offset = 0;
-                            break;
-                        }
-                    } else if (isFallbackUsed() ? (c & 0xfffe) == 0xe000 : c == 0xe000) {
-                        /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
-                        target.put(unicodeCodeUnits[offset]);
-                        if (offsets != null) {
-                            offsets.put(sourceIndex);
-                        }
-                        byteIndex = 0;
-                    } else if (c == 0xffff) {
-                        /* callback(illegal) */
-                        cr[0] = CoderResult.malformedForLength(byteIndex);
-                    }
-                } else if (action == MBCS_STATE_VALID_DIRECT_20
-                        || (action == MBCS_STATE_FALLBACK_DIRECT_20 && isFallbackUsed())) {
-                    entry = MBCS_ENTRY_FINAL_VALUE(entry);
-                    /* output surrogate pair */
-                    target.put((char) (0xd800 | (char) (entry >> 10)));
-                    if (offsets != null) {
-                        offsets.put(sourceIndex);
-                    }
-                    byteIndex = 0;
-                    c = (char) (0xdc00 | (char) (entry & 0x3ff));
-                    if (target.hasRemaining()) {
-                        target.put(c);
-                        if (offsets != null) {
-                            offsets.put(sourceIndex);
-                        }
-                    } else {
-                        /* target overflow */
-                        charErrorBufferArray[0] = c;
-                        charErrorBufferLength = 1;
-                        cr[0] = CoderResult.OVERFLOW;
-
-                        offset = 0;
-                        break;
-                    }
-                } else if (action == MBCS_STATE_CHANGE_ONLY) {
-                    /*
-                     * This serves as a state change without any output. It is useful for reading simple stateful
-                     * encodings, for example using just Shift-In/Shift-Out codes. The 21 unused bits may later be used
-                     * for more sophisticated state transitions.
-                     */
-                    if (sharedData.mbcs.dbcsOnlyState == 0) {
-                        byteIndex = 0;
-                    } else {
-                        /* SI/SO are illegal for DBCS-only conversion */
-                        state = (byte) (mode); /* restore the previous state */
-
-                        /* callback(illegal) */
-                        cr[0] = CoderResult.malformedForLength(byteIndex);
-                    }
-                } else if (action == MBCS_STATE_FALLBACK_DIRECT_16) {
-                    if (isFallbackUsed()) {
-                        /* output BMP code point */
-                        target.put((char) MBCS_ENTRY_FINAL_VALUE_16(entry));
-                        if (offsets != null) {
-                            offsets.put(sourceIndex);
-                        }
-                        byteIndex = 0;
-                    }
-                } else if (action == MBCS_STATE_UNASSIGNED) {
-                    /* just fall through */
-                } else if (action == MBCS_STATE_ILLEGAL) {
-                    /* callback(illegal) */
-                    cr[0] = CoderResult.malformedForLength(byteIndex);
-                } else {
-                    /* reserved, must never occur */
-                    byteIndex = 0;
-                }
-
-                /* end of action codes: prepare for a new character */
-                offset = 0;
-
-                if (byteIndex == 0) {
-                    sourceIndex = nextSourceIndex;
-                } else if (cr[0].isError()) {
-                    /* callback(illegal) */
-                    break;
-                } else /* unassigned sequences indicated with byteIndex>0 */{
-                    /* try an extension mapping */
-                    int sourceBeginIndex = sourceArrayIndex;
-                    source.position(sourceArrayIndex);
-                    byteIndex = toU(byteIndex, source, target, offsets, sourceIndex, flush, cr);
-                    sourceArrayIndex = source.position();
-                    sourceIndex = nextSourceIndex + (int) (sourceArrayIndex - sourceBeginIndex);
-
-                    if (cr[0].isError() || cr[0].isOverflow()) {
-                        /* not mappable or buffer overflow */
-                        break;
-                    }
-                }
-            }
-
-            /* set the converter state back into UConverter */
-            toUnicodeStatus = offset;
-            mode = state;
-            toULength = byteIndex;
-
-            /* write back the updated pointers */
-            source.position(sourceArrayIndex);
-
-            return cr[0];
+        /* Just call cnvMBCSToUnicodeWithOffsets() to remove duplicate code. */
+            return cnvMBCSToUnicodeWithOffsets(source, target, offsets, flush);
        }

        /*
@ -2253,132 +1931,134 @@ class CharsetMBCS extends CharsetICU {

        CoderResult cnvMBCSToUnicodeWithOffsets(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) {
            CoderResult[] cr = { CoderResult.UNDERFLOW };
-            
-            int[][] stateTable;
+
+            int sourceArrayIndex, sourceArrayIndexStart;
+            int stateTable[][/* 256 */];
            char[] unicodeCodeUnits;
-            
-            int sourceIndex, nextSourceIndex;
-            
+
            int offset;
-            short state;
+            byte state;
            int byteIndex;
            byte[] bytes;
-            
-            int entry;
+
+            int sourceIndex, nextSourceIndex;
+
+            int entry = 0;
            char c;
-            short action;
-            
-            if (this.preToULength > 0) {
+            byte action;
+
+            if (preToULength > 0) {
                /*
-                 * pass sourceIndex-1 because we continue from an earlier buffer
-                 * in the future, this may change with continuous offsets
+                 * pass sourceIndex=-1 because we continue from an earlier buffer in the future, this may change with
+                 * continuous offsets
                 */
                cr[0] = continueMatchToU(source, target, offsets, -1, flush);
-                if (cr[0].isError() || this.preToULength < 0) {
+
+                if (cr[0].isError() || preToULength < 0) {
                    return cr[0];
                }
            }
-            
+
            if (sharedData.mbcs.countStates == 1) {
-                if ((sharedData.mbcs.unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
+                if ((sharedData.mbcs.unicodeMask & UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
                    cr[0] = cnvMBCSSingleToBMPWithOffsets(source, target, offsets, flush);
                } else {
                    cr[0] = cnvMBCSSingleToUnicodeWithOffsets(source, target, offsets, flush);
                }
                return cr[0];
            }
-            
-            if ((options&UConverterConstants.OPTION_SWAP_LFNL) != 0) {
+
+            /* set up the local pointers */
+            sourceArrayIndex = sourceArrayIndexStart = source.position();
+
+            if ((options & UConverterConstants.OPTION_SWAP_LFNL) != 0) {
                stateTable = sharedData.mbcs.swapLFNLStateTable;
            } else {
                stateTable = sharedData.mbcs.stateTable;
            }
            unicodeCodeUnits = sharedData.mbcs.unicodeCodeUnits;
-            
+
            /* get the converter state from UConverter */
-            offset = this.toUnicodeStatus;
-            byteIndex = this.toULength;
-            bytes = this.toUBytesArray;
-            
+            offset = (int)toUnicodeStatus;
+            byteIndex = toULength;
+            bytes = toUBytesArray;
+
            /*
-             * if we are in the SBCS state for a DBCS-only converter,
-             * then load the DBCS state from the MBCS data
+             * if we are in the SBCS state for a DBCS-only converter, then load the DBCS state from the MBCS data
             * (dbcsOnlyState==0 if it is not a DBCS-only converter)
             */
-            state = (short)(UConverterConstants.UNSIGNED_BYTE_MASK&this.mode);
+            state = (byte)mode;
            if (state == 0) {
                state = sharedData.mbcs.dbcsOnlyState;
            }
-            
-            /* sourceIndex=-1 if the current character begain in the previous buffer */
+
+            /* sourceIndex=-1 if the current character began in the previous buffer */
            sourceIndex = byteIndex == 0 ? 0 : -1;
            nextSourceIndex = 0;
-            
+
            /* conversion loop */
-            while (source.hasRemaining()) {
+            while (sourceArrayIndex < source.limit()) {
                /*
-                 * This following test is to see if available input would overflow the output.
-                 * It does not catch output of more than one code unit that
-                 * overflows as a result of a surrogate pair or callback output
-                 * from the last source byte.
-                 * Therefore, those situations also test for overflows and will
-                 * then break the loop, too.
+                 * This following test is to see if available input would overflow the output. It does not catch output
+                 * of more than one code unit that overflows as a result of a surrogate pair or callback output from the
+                 * last source byte. Therefore, those situations also test for overflows and will then break the loop,
+                 * too.
                 */
                if (!target.hasRemaining()) {
                    /* target is full */
                    cr[0] = CoderResult.OVERFLOW;
                    break;
                }
-                
+
                if (byteIndex == 0) {
                    /* optimized loop for 1/2-byte input and BMP output */
+                    // agljport:todo see ucnvmbcs.c for deleted block
                    do {
-                        entry = stateTable[state][(short)source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK];
+                        entry = stateTable[state][source.get(sourceArrayIndex)&UConverterConstants.UNSIGNED_BYTE_MASK];
                        if (MBCS_ENTRY_IS_TRANSITION(entry)) {
-                            state = (short)(UConverterConstants.UNSIGNED_BYTE_MASK&MBCS_ENTRY_TRANSITION_STATE(entry));
+                            state = (byte)MBCS_ENTRY_TRANSITION_STATE(entry);
                            offset = MBCS_ENTRY_TRANSITION_OFFSET(entry);
-                            
-                            source.get();
-                            if (source.hasRemaining() &&
-                                    MBCS_ENTRY_IS_FINAL(entry=stateTable[state][(short)source.get(source.position()) & UConverterConstants.UNSIGNED_BYTE_MASK]) &&
-                                    MBCS_ENTRY_FINAL_ACTION(entry) == MBCS_STATE_VALID_16 &&
-                                    (c = unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)]) < 0xfffe) {
-                                source.get();
+                            ++sourceArrayIndex;
+                            if (sourceArrayIndex < source.limit()
+                                    && MBCS_ENTRY_IS_FINAL(entry = stateTable[state][source.get(sourceArrayIndex)&UConverterConstants.UNSIGNED_BYTE_MASK])
+                                    && MBCS_ENTRY_FINAL_ACTION(entry) == MBCS_STATE_VALID_16
+                                    && (c = unicodeCodeUnits[offset + MBCS_ENTRY_FINAL_VALUE_16(entry)]) < 0xfffe) {
+                                ++sourceArrayIndex;
                                target.put(c);
                                if (offsets != null) {
                                    offsets.put(sourceIndex);
-                                    sourceIndex = (nextSourceIndex + 2);
+                                    sourceIndex = (nextSourceIndex += 2);
                                }
-                                state = (short)(UConverterConstants.UNSIGNED_BYTE_MASK&MBCS_ENTRY_FINAL_STATE(entry)); /* typically 0 */
+                                state = (byte)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
                                offset = 0;
                            } else {
                                /* set the state and leave the optimized loop */
                                ++nextSourceIndex;
-                                bytes[0] = source.get(source.position()-1);
+                                bytes[0] = source.get(sourceArrayIndex - 1);
                                byteIndex = 1;
                                break;
                            }
                        } else {
                            if (MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) {
                                /* output BMP code point */
-                                source.get();
+                                ++sourceArrayIndex;
                                target.put((char)MBCS_ENTRY_FINAL_VALUE_16(entry));
                                if (offsets != null) {
                                    offsets.put(sourceIndex);
                                    sourceIndex = ++nextSourceIndex;
                                }
-                                state = (short)(UConverterConstants.UNSIGNED_BYTE_MASK&MBCS_ENTRY_FINAL_STATE(entry)); /* typically 0 */
+                                state = (byte)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
                            } else {
                                /* leave the optimized loop */
                                break;
                            }
                        }
-                    } while (source.hasRemaining() && target.hasRemaining());
-                    
-                    /* these tests and break statements could be put inside the loop
-                     * if C had "break outerLoop" like Java
+                    } while (sourceArrayIndex < source.limit() && target.hasRemaining());
+                    /*
+                     * these tests and break statements could be put inside the loop if C had "break outerLoop" like
+                     * Java
                     */
-                    if (!source.hasRemaining()) {
+                    if (sourceArrayIndex >= source.limit()) {
                        break;
                    }
                    if (!target.hasRemaining()) {
@ -2386,31 +2066,32 @@ class CharsetMBCS extends CharsetICU {
                        cr[0] = CoderResult.OVERFLOW;
                        break;
                    }
-                    
+
                    ++nextSourceIndex;
-                    bytes[byteIndex++] = source.get();
-                } else { /* byteIndex>0 */
+                    bytes[byteIndex++] = source.get(sourceArrayIndex++);
+                } else /* byteIndex>0 */{
                    ++nextSourceIndex;
-                    entry = stateTable[state][(short)(bytes[byteIndex++]=source.get()) & UConverterConstants.UNSIGNED_BYTE_MASK];
+                    entry = stateTable[state][(bytes[byteIndex++] = source.get(sourceArrayIndex++))
+                            & UConverterConstants.UNSIGNED_BYTE_MASK];
                }
-                
+
                if (MBCS_ENTRY_IS_TRANSITION(entry)) {
-                    state = (short)(UConverterConstants.UNSIGNED_BYTE_MASK&MBCS_ENTRY_TRANSITION_STATE(entry));
-                    offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
+                    state = (byte)MBCS_ENTRY_TRANSITION_STATE(entry);
+                    offset += MBCS_ENTRY_TRANSITION_OFFSET(entry);
                    continue;
                }
-                
+
                /* save the previous state for proper extension mapping with SI/SO-stateful converters */
                mode = state;
-                
+
                /* set the next state early so that we can reuse the entry variable */
-                state = (short)(UConverterConstants.UNSIGNED_BYTE_MASK&MBCS_ENTRY_FINAL_STATE(entry)); /* typically 0 */
-                
+                state = (byte)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */
+
                /*
-                 * An if-else-if chain provides more reliable performance for
-                 * the most common cases compared to a switch.
+                 * An if-else-if chain provides more reliable performance for the most common cases compared to a
+                 * switch.
                 */
-                action = (short)(UConverterConstants.UNSIGNED_BYTE_MASK&MBCS_ENTRY_FINAL_ACTION(entry));
+                action = (byte)MBCS_ENTRY_FINAL_ACTION(entry);
                if (action == MBCS_STATE_VALID_16) {
                    offset += MBCS_ENTRY_FINAL_VALUE_16(entry);
                    c = unicodeCodeUnits[offset];
@ -2422,7 +2103,7 @@ class CharsetMBCS extends CharsetICU {
                        }
                        byteIndex = 0;
                    } else if (c == 0xfffe) {
-                        if (CharsetDecoderICU.isToUUseFallback() && (entry = (int)getFallback(sharedData.mbcs, offset)) != 0xfffe) {
+                        if (isFallbackUsed() && (entry = (int)getFallback(sharedData.mbcs, offset)) != 0xfffe) {
                            /* output fallback BMP code point */
                            target.put((char)entry);
                            if (offsets != null) {
@ -2432,7 +2113,7 @@ class CharsetMBCS extends CharsetICU {
                        }
                    } else {
                        /* callback(illegal) */
-                        cr[0] = CoderResult.malformedForLength(1);
+                        cr[0] = CoderResult.malformedForLength(byteIndex);
                    }
                } else if (action == MBCS_STATE_VALID_DIRECT_16) {
                    /* output BMP code point */
@ -2451,9 +2132,9 @@ class CharsetMBCS extends CharsetICU {
                            offsets.put(sourceIndex);
                        }
                        byteIndex = 0;
-                    } else if (CharsetDecoderICU.isToUUseFallback() ? c<=0xdfff : c<=0xdbff) {
+                    } else if (isFallbackUsed() ? c <= 0xdfff : c <= 0xdbff) {
                        /* output roundtrip or fallback surrogate pair */
-                        target.put((char)(c&0xdbff));
+                        target.put((char)(c & 0xdbff));
                        if (offsets != null) {
                            offsets.put(sourceIndex);
                        }
@ -2468,11 +2149,11 @@ class CharsetMBCS extends CharsetICU {
                            charErrorBufferArray[0] = unicodeCodeUnits[offset];
                            charErrorBufferLength = 1;
                            cr[0] = CoderResult.OVERFLOW;
-                            
+
                            offset = 0;
                            break;
                        }
-                    } else if (CharsetDecoderICU.isToUUseFallback() ? (c&0xfffe)==0xe000 : c==0xe000) {
+                    } else if (isFallbackUsed() ? (c & 0xfffe) == 0xe000 : c == 0xe000) {
                        /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
                        target.put(unicodeCodeUnits[offset]);
                        if (offsets != null) {
@ -2481,18 +2162,18 @@ class CharsetMBCS extends CharsetICU {
                        byteIndex = 0;
                    } else if (c == 0xffff) {
                        /* callback(illegal) */
-                        cr[0] = CoderResult.malformedForLength(1);
+                        cr[0] = CoderResult.malformedForLength(byteIndex);
                    }
-                } else if (action == MBCS_STATE_VALID_DIRECT_20 ||
-                        action == MBCS_STATE_FALLBACK_DIRECT_20 && CharsetDecoderICU.isToUUseFallback()) {
+                } else if (action == MBCS_STATE_VALID_DIRECT_20
+                        || (action == MBCS_STATE_FALLBACK_DIRECT_20 && isFallbackUsed())) {
                    entry = MBCS_ENTRY_FINAL_VALUE(entry);
                    /* output surrogate pair */
-                    target.put((char)(0xd800 | (char)(entry&0x3ff)));
+                    target.put((char)(0xd800 | (char)(entry >> 10)));
                    if (offsets != null) {
                        offsets.put(sourceIndex);
                    }
                    byteIndex = 0;
-                    c = (char)(0xdc00 | (char)(entry>>10));
+                    c = (char)(0xdc00 | (char)(entry & 0x3ff));
                    if (target.hasRemaining()) {
                        target.put(c);
                        if (offsets != null) {
@ -2503,30 +2184,27 @@ class CharsetMBCS extends CharsetICU {
                        charErrorBufferArray[0] = c;
                        charErrorBufferLength = 1;
                        cr[0] = CoderResult.OVERFLOW;
-                        
+
                        offset = 0;
                        break;
                    }
-                    
                } else if (action == MBCS_STATE_CHANGE_ONLY) {
                    /*
-                     * This serves as a state change without any output.
-                     * It is useful for reading simple stateful encodings,
-                     * for example using just Shift-In/Shift-Out codes.
-                     * The 21 unused bits may later be used for more sophisticated
-                     * state transistions.
+                     * This serves as a state change without any output. It is useful for reading simple stateful
+                     * encodings, for example using just Shift-In/Shift-Out codes. The 21 unused bits may later be used
+                     * for more sophisticated state transitions.
                     */
                    if (sharedData.mbcs.dbcsOnlyState == 0) {
                        byteIndex = 0;
                    } else {
                        /* SI/SO are illegal for DBCS-only conversion */
-                        state = (short)(UConverterConstants.UNSIGNED_BYTE_MASK&mode); /* restore the previous state */
-                        
+                        state = (byte)(mode); /* restore the previous state */
+
                        /* callback(illegal) */
-                        cr[0] = CoderResult.malformedForLength(1);
+                        cr[0] = CoderResult.malformedForLength(byteIndex);
                    }
                } else if (action == MBCS_STATE_FALLBACK_DIRECT_16) {
-                    if (CharsetDecoderICU.isToUUseFallback()) {
+                    if (isFallbackUsed()) {
                        /* output BMP code point */
                        target.put((char)MBCS_ENTRY_FINAL_VALUE_16(entry));
                        if (offsets != null) {
@ -2538,37 +2216,70 @@ class CharsetMBCS extends CharsetICU {
                    /* just fall through */
                } else if (action == MBCS_STATE_ILLEGAL) {
                    /* callback(illegal) */
-                    cr[0] = CoderResult.malformedForLength(1);
+                    cr[0] = CoderResult.malformedForLength(byteIndex);
                } else {
                    /* reserved, must never occur */
                    byteIndex = 0;
                }
-                
-                /* end of action codes: prepare for new character */
+
+                /* end of action codes: prepare for a new character */
                offset = 0;
-                
+
                if (byteIndex == 0) {
                    sourceIndex = nextSourceIndex;
                } else if (cr[0].isError()) {
                    /* callback(illegal) */
+                    if (byteIndex > 1) {
+                        /*
+                         * Ticket 5691: consistent illegal sequences:
+                         * - We include at least the first byte in the illegal sequence.
+                         * - If any of the non-initial bytes could be the start of a character,
+                         *   we stop the illegal sequence before the first one of those.
+                         */
+                        boolean isDBCSOnly = (sharedData.mbcs.dbcsOnlyState != 0);
+                        byte i;
+                        for (i = 1; i < byteIndex && !isSingleOrLead(stateTable, state, isDBCSOnly, (short)(bytes[i] & UConverterConstants.UNSIGNED_BYTE_MASK)); i++) {}
+                        if (i < byteIndex) {
+                            byte backOutDistance = (byte)(byteIndex - i);
+                            int bytesFromThisBuffer = sourceArrayIndex - sourceArrayIndexStart;
+                            byteIndex = i; /* length of reported illegal byte sequence */
+                            if (backOutDistance <= bytesFromThisBuffer) {
+                                sourceArrayIndex -= backOutDistance;
+                            } else {
+                                /* Back out bytes from the previous buffer: Need to replay them. */
+                                this.preToULength = (byte)(bytesFromThisBuffer - backOutDistance);
+                                /* preToULength is negative! */
+                                for (int n = 0; n < -this.preToULength; n++) {
+                                    this.preToUArray[n] = bytes[i+n];
+                                }
+                                sourceArrayIndex = sourceArrayIndexStart;
+                            }
+                        }
+                    }
                    break;
-                } else { /* unassigned sequences indicated with byteIndex>0 */
+                } else /* unassigned sequences indicated with byteIndex>0 */{
                    /* try an extension mapping */
+                    int sourceBeginIndex = sourceArrayIndex;
+                    source.position(sourceArrayIndex);
                    byteIndex = toU(byteIndex, source, target, offsets, sourceIndex, flush, cr);
-                    sourceIndex = nextSourceIndex + source.position();
-                    
-                    if (cr[0].isError()) {
+                    sourceArrayIndex = source.position();
+                    sourceIndex = nextSourceIndex += (int)(sourceArrayIndex - sourceBeginIndex);
+
+                    if (cr[0].isError() || cr[0].isOverflow()) {
                        /* not mappable or buffer overflow */
                        break;
                    }
                }
            }
-            
+
            /* set the converter state back into UConverter */
            toUnicodeStatus = offset;
            mode = state;
            toULength = byteIndex;
-            
+
+            /* write back the updated pointers */
+            source.position(sourceArrayIndex);
+
            return cr[0];
        }
        /*
@ -2908,8 +2619,7 @@ class CharsetMBCS extends CharsetICU {
            /* conversion loop */
            while (true) {
                // entry=stateTable[state][(uint8_t)source[i++]];
-                entry = stateTable[state][source.get() & UConverterConstants.UNSIGNED_BYTE_MASK];
-                i = source.position();
+                entry = stateTable[state][source.get(i++) & UConverterConstants.UNSIGNED_BYTE_MASK];

                if (MBCS_ENTRY_IS_TRANSITION(entry)) {
                    state = MBCS_ENTRY_TRANSITION_STATE(entry);
@ -2991,8 +2701,8 @@ class CharsetMBCS extends CharsetICU {
                /* try an extension mapping */
                if (sharedData.mbcs.extIndexes != null) {
                    /* Increase the limit for proper handling. Used in LMBCS. */
-                    if (source.limit() >= source.position() + length) {
-                        source.limit(source.position() + length);
+                    if (source.limit() > i + length) {
+                        source.limit(i + length);
                    }
                    return simpleMatchToU(source, useFallback);
                }
@ -3000,6 +2710,51 @@ class CharsetMBCS extends CharsetICU {

            return c;
        }
+        private boolean hasValidTrailBytes(int[][] stateTable, short state) {
+            int[] row = stateTable[state];
+            int b, entry;
+            /* First test for final entries in this state for some commonly valid byte values. */
+            entry = row[0xa1];
+            if (!MBCS_ENTRY_IS_TRANSITION(entry) && MBCS_ENTRY_FINAL_ACTION(entry) != MBCS_STATE_ILLEGAL) {
+                return true;
+            }
+            entry = row[0x41];
+            if (!MBCS_ENTRY_IS_TRANSITION(entry) && MBCS_ENTRY_FINAL_ACTION(entry) != MBCS_STATE_ILLEGAL) {
+                return true;
+            }
+            /* Then test for final entries in this state. */
+            for (b = 0; b <= 0xff; b++) {
+                entry = row[b];
+                if (!MBCS_ENTRY_IS_TRANSITION(entry) && MBCS_ENTRY_FINAL_ACTION(entry) != MBCS_STATE_ILLEGAL) {
+                    return true;
+                }
+            }
+            /* Then recurse for transition entries. */
+            for (b = 0; b <= 0xff; b++) {
+                entry = row[b];
+                if (MBCS_ENTRY_IS_TRANSITION(entry) && 
+                        hasValidTrailBytes(stateTable, (short)(MBCS_ENTRY_TRANSITION_STATE(entry) & UConverterConstants.UNSIGNED_BYTE_MASK))) {
+                    return true;
+                }
+            }
+            return false;
+        }
+        
+        private boolean isSingleOrLead(int[][] stateTable, int state, boolean isDBCSOnly, int b) {
+            int[] row = stateTable[state];
+            int entry = row[b];
+            if (MBCS_ENTRY_IS_TRANSITION(entry)) { /* lead byte */
+                return hasValidTrailBytes(stateTable, (short)(MBCS_ENTRY_TRANSITION_STATE(entry) & UConverterConstants.UNSIGNED_BYTE_MASK));
+            } else {
+                short action = (short)(MBCS_ENTRY_FINAL_ACTION(entry) & UConverterConstants.UNSIGNED_BYTE_MASK);
+                if (action == MBCS_STATE_CHANGE_ONLY && isDBCSOnly) {
+                    return false;   /* SI/SO are illegal for DBCS-only conversion */
+                } else {
+                    return (action != MBCS_STATE_ILLEGAL);
+                }
+            }
+        }
+        

    }

--- a/icu4j/src/com/ibm/icu/dev/test/charset/TestCharset.java
+++ b/icu4j/src/com/ibm/icu/dev/test/charset/TestCharset.java
@ -1,6 +1,6 @@
 /**
 *******************************************************************************
-* Copyright (C) 2006-2008, International Business Machines Corporation and    *
+* Copyright (C) 2006-2009, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 *
@ -4809,21 +4809,6 @@ public class TestCharset extends TestFmwk {
        if (!result.isOverflow()) {
            errln("Overflow buffer while decoding ISO-2022-KR should have occurred.");
        }
-        
-        /* This is part of the ambiguous converter test in ICU4C and is used here to provide
-         * better code coverage.
-         */
-        byte [] bytearray2 = {
-                0x61, 0x5b, 0x5c
-        };
-        
-        bb = ByteBuffer.wrap(bytearray2);
-        cb = CharBuffer.allocate(20);
-        
-        result = decoder.decode(bb, cb, true);
-        if (!result.isMalformed()) {
-            errln("Malformed error while decoding ISO-2022-KR should have occurred.");
-        }
    }
    
    //provide better code coverage for Charset ISO-2022-JP
--- a/icu4j/src/com/ibm/icu/dev/test/charset/TestConversion.java
+++ b/icu4j/src/com/ibm/icu/dev/test/charset/TestConversion.java
@ -1090,38 +1090,16 @@ public class TestConversion extends ModuleTest {
        output.limit(output.position());
        output.rewind();

-//TODO: Fix Me!  After Ticket#6583 is completed, this code should be removed.
-        boolean ignoreError = (0 <= cc.caseNr && cc.caseNr <= 15) || cc.caseNr == 17 || cc.caseNr == 18;
-//TODO: End
-
        // test to see if the conversion matches actual results
        if (output.limit() != expected.length()) {
-//TODO: Remove this
-            if (ignoreError) {
-                logln("Test failed: output length does not match expected for charset: "+cc.charset+ " [" + cc.caseNr + "]");
-            } else {
-                errln("Test failed: output length does not match expected for charset: "+cc.charset+ " [" + cc.caseNr + "]");
-                res = false;
-            }
-//TODO: End
-//            errln("Test failed: output length does not match expected for charset: "+cc.charset+ " [" + cc.caseNr + "]");
-//            res = false;
+            errln("Test failed: output length does not match expected for charset: "+cc.charset+ " [" + cc.caseNr + "]");
+            res = false;
        } else {
            for (int i = 0; i < expected.length(); i++) {
                if (output.get(i) != expected.charAt(i)) {
-//TODO: Remove this
-                    if (ignoreError) {
-                        logln("Test failed: output does not match expected for charset: " + cc.charset
-                                + " [" + cc.caseNr + "]");
-                    } else {
-                        errln("Test failed: output does not match expected for charset: " + cc.charset
-                                + " [" + cc.caseNr + "]");
-                        res = false;
-                    }
-//TODO: End
-//                    errln("Test failed: output does not match expected for charset: " + cc.charset
-//                            + " [" + cc.caseNr + "]");
-//                    res = false;
+                    errln("Test failed: output does not match expected for charset: " + cc.charset
+                            + " [" + cc.caseNr + "]");
+                    res = false;
                    break;
                }
            }