ICU-5564 conform to java's spec for UTF-16 converter

X-SVN-Rev: 20917
2007-01-24 21:54:59 +00:00 · 2007-01-24 21:54:59 +00:00 · 31a9f8c37b
commit 31a9f8c37b
parent 98cf7d46ae
12 changed files with 857 additions and 369 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -55,6 +55,8 @@ icu4c/source/test/testdata/importtest.bin -text
 icu4c/source/test/testdata/uni-text.bin -text
 icu4j/ee.foundation.jar -text
 icu4j/license.html -text
+icu4j/src/com/ibm/icu/charset/CharsetUTF16BE.java -text
+icu4j/src/com/ibm/icu/charset/CharsetUTF32BE.java -text
 icu4j/src/com/ibm/icu/dev/data/rbbi/english.dict -text
 icu4j/src/com/ibm/icu/dev/data/testdata.jar -text
 icu4j/src/com/ibm/icu/dev/data/thai6.ucs -text
--- a/icu4j/build.xml
+++ b/icu4j/build.xml
@ -149,6 +149,10 @@
            <srcfiles dir="${build.dir}" includes="${icu4j.data.path}/*.icu"/>
        </uptodate>
        <!-- <echo message="icu4j.module.resources result: ${icu4j.module.resources}" /> -->
+        <tstamp>
+            <format property="date.time" pattern="yyyy-MM-dd 'at' hh:mm:ss z" locale="en,US"/>
+        </tstamp>
+        <echo message="Initialized at ${date.time}"/>
    </target>

    <!-- build everything but dist-related stuff -->
--- a/icu4j/src/com/ibm/icu/charset/CharsetDecoderICU.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetDecoderICU.java
@ -169,7 +169,7 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{
        setSourcePosition(in);
        return ret;
 	}
-    
+ 
    /**
     * Implements the ICU semantic for decode operation
     * @param in The input byte buffer
--- a/icu4j/src/com/ibm/icu/charset/CharsetEncoderICU.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetEncoderICU.java
@ -29,6 +29,9 @@ import com.ibm.icu.text.UTF16;
 */
 public abstract class CharsetEncoderICU extends CharsetEncoder {

+    static final int NEED_TO_WRITE_BOM = 1;
+    boolean writeBOM = false; /* only used by UTF-16, UTF-32 */
+    
    byte[] errorBuffer = new byte[30];
    int errorBufferLength = 0;
    
--- a/icu4j/src/com/ibm/icu/charset/CharsetICU.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetICU.java
@ -14,6 +14,7 @@ import java.io.InputStreamReader;
 import java.lang.reflect.Constructor;

 import java.lang.reflect.InvocationTargetException;
+import java.nio.ByteBuffer;
 import java.nio.charset.Charset;
 import java.nio.charset.IllegalCharsetNameException;
 import java.nio.charset.UnsupportedCharsetException;
@ -62,7 +63,6 @@ public abstract class CharsetICU extends Charset{
     byte subChar1;               /* +80: 1  single-byte substitution character for IBM MBCS (0 if none) */
     byte reserved[/*19*/];           /* +81: 19 to round out the structure */
     
-     boolean writeBOM = false; /* only used by UTF-16, UTF-32 */
     
    /**
     * 
@ -120,15 +120,15 @@ public abstract class CharsetICU extends Charset{
        algorithmicCharsets.put("US-ASCII",              "com.ibm.icu.charset.CharsetASCII" );
        algorithmicCharsets.put("ISO-8859-1",            "com.ibm.icu.charset.Charset88591" );
        algorithmicCharsets.put("UTF-16",                "com.ibm.icu.charset.CharsetUTF16" );
-        algorithmicCharsets.put("UTF-16BE",              "com.ibm.icu.charset.CharsetUTF16" );
+        algorithmicCharsets.put("UTF-16BE",              "com.ibm.icu.charset.CharsetUTF16BE" );
        algorithmicCharsets.put("UTF-16LE",              "com.ibm.icu.charset.CharsetUTF16LE" );
        algorithmicCharsets.put("UTF16_OppositeEndian",  "com.ibm.icu.charset.CharsetUTF16LE" );
        algorithmicCharsets.put("UTF16_PlatformEndian",  "com.ibm.icu.charset.CharsetUTF16" );
        algorithmicCharsets.put("UTF-32",                "com.ibm.icu.charset.CharsetUTF32" );
-        algorithmicCharsets.put("UTF-32BE",              "com.ibm.icu.charset.CharsetUTF32" );
+        algorithmicCharsets.put("UTF-32BE",              "com.ibm.icu.charset.CharsetUTF32BE" );
        algorithmicCharsets.put("UTF-32LE",              "com.ibm.icu.charset.CharsetUTF32LE" );
-        algorithmicCharsets.put("UTF32_PlatformEndian",  "com.ibm.icu.charset.CharsetUTF32LE" );
-        algorithmicCharsets.put("UTF32_OppositeEndian",  "com.ibm.icu.charset.CharsetUTF32" );
+        algorithmicCharsets.put("UTF32_OppositeEndian",  "com.ibm.icu.charset.CharsetUTF32LE" );
+        algorithmicCharsets.put("UTF32_PlatformEndian",  "com.ibm.icu.charset.CharsetUTF32" );
        algorithmicCharsets.put("UTF-7",                 "com.ibm.icu.charset.CharsetUTF7" );
        algorithmicCharsets.put("UTF-8",                 "com.ibm.icu.charset.CharsetUTF8" );
    }
@ -223,11 +223,106 @@ public abstract class CharsetICU extends Charset{
        CharsetProviderICU icuProvider = new CharsetProviderICU();
        CharsetICU cs = (CharsetICU) icuProvider.charsetForName(charsetName);
        if (cs != null) {
-            cs.writeBOM = true;
            return cs;
        }
        return Charset.forName(charsetName);
    }
    
+    /**
+     * This follows ucnv.c method ucnv_detectUnicodeSignature() to detect the
+     * start of the stream for example U+FEFF (the Unicode BOM/signature
+     * character) that can be ignored.
+     * 
+     * Detects Unicode signature byte sequences at the start of the byte stream
+     * and returns number of bytes of the BOM of the indicated Unicode charset.
+     * 0 is returned when no Unicode signature is recognized.
+     * 
+     */
+    static String detectUnicodeSignature(ByteBuffer source) {
+        int signatureLength = 0; // number of bytes of the signature
+        final int SIG_MAX_LEN = 5;
+        String sigUniCharset = null; // states what unicode charset is the BOM
+        int i = 0;
+
+        /*
+         * initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN bytes we
+         * don't misdetect something
+         */
+        byte start[] = { (byte) 0xa5, (byte) 0xa5, (byte) 0xa5, (byte) 0xa5,
+                (byte) 0xa5 };
+
+        while (i < source.remaining() && i < SIG_MAX_LEN) {
+            start[i] = source.get(i);
+            i++;
+        }
+
+        if (start[0] == (byte) 0xFE && start[1] == (byte) 0xFF) {
+            signatureLength = 2;
+            sigUniCharset = "UTF-16BE";
+            source.position(signatureLength);
+            return sigUniCharset;
+        } else if (start[0] == (byte) 0xFF && start[1] == (byte) 0xFE) {
+            if (start[2] == (byte) 0x00 && start[3] == (byte) 0x00) {
+                signatureLength = 4;
+                sigUniCharset = "UTF-32LE";
+                source.position(signatureLength);
+                return sigUniCharset;
+            } else {
+                signatureLength = 2;
+                sigUniCharset = "UTF-16LE";
+                source.position(signatureLength);
+                return sigUniCharset;
+            }
+        } else if (start[0] == (byte) 0xEF && start[1] == (byte) 0xBB
+                && start[2] == (byte) 0xBF) {
+            signatureLength = 3;
+            sigUniCharset = "UTF-8";
+            source.position(signatureLength);
+            return sigUniCharset;
+        } else if (start[0] == (byte) 0x00 && start[1] == (byte) 0x00
+                && start[2] == (byte) 0xFE && start[3] == (byte) 0xFF) {
+            signatureLength = 4;
+            sigUniCharset = "UTF-32BE";
+            source.position(signatureLength);
+            return sigUniCharset;
+        } else if (start[0] == (byte) 0x0E && start[1] == (byte) 0xFE
+                && start[2] == (byte) 0xFF) {
+            signatureLength = 3;
+            sigUniCharset = "SCSU";
+            source.position(signatureLength);
+            return sigUniCharset;
+        } else if (start[0] == (byte) 0xFB && start[1] == (byte) 0xEE
+                && start[2] == (byte) 0x28) {
+            signatureLength = 3;
+            sigUniCharset = "BOCU-1";
+            source.position(signatureLength);
+            return sigUniCharset;
+        } else if (start[0] == (byte) 0x2B && start[1] == (byte) 0x2F
+                && start[2] == (byte) 0x76) {
+
+            if (start[3] == (byte) 0x38 && start[4] == (byte) 0x2D) {
+                signatureLength = 5;
+                sigUniCharset = "UTF-7";
+                source.position(signatureLength);
+                return sigUniCharset;
+            } else if (start[3] == (byte) 0x38 || start[3] == (byte) 0x39
+                    || start[3] == (byte) 0x2B || start[3] == (byte) 0x2F) {
+                signatureLength = 4;
+                sigUniCharset = "UTF-7";
+                source.position(signatureLength);
+                return sigUniCharset;
+            }
+        } else if (start[0] == (byte) 0xDD && start[2] == (byte) 0x73
+                && start[2] == (byte) 0x66 && start[3] == (byte) 0x73) {
+            signatureLength = 4;
+            sigUniCharset = "UTF-EBCDIC";
+            source.position(signatureLength);
+            return sigUniCharset;
+        }
+
+        /* no known Unicode signature byte sequence recognized */
+        return null;
+    }
+
 }

--- a/icu4j/src/com/ibm/icu/charset/CharsetUTF16.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetUTF16.java
@ -20,7 +20,7 @@ import com.ibm.icu.text.UTF16;
 class CharsetUTF16 extends CharsetICU {
    
    protected byte[] fromUSubstitution = new byte[]{(byte)0xff, (byte)0xfd};
-   
+ 
    public CharsetUTF16(String icuCanonicalName, String javaCanonicalName, String[] aliases){
        super(icuCanonicalName, javaCanonicalName, aliases);
        maxBytesPerChar = 4;
@ -28,17 +28,152 @@ class CharsetUTF16 extends CharsetICU {
        maxCharsPerByte = 1;
    }
    class CharsetDecoderUTF16 extends CharsetDecoderICU{
-
+        ByteBuffer utf16BOM = ByteBuffer.wrap(new byte[]{ (byte)0xfe, (byte)0xff, 0, 0,    (byte)0xff, (byte)0xfe, 0, 0 });        
        public CharsetDecoderUTF16(CharsetICU cs) {
            super(cs);
        }
-
        protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){
+            int state, offsetDelta;
+            byte b;
            CoderResult cr = CoderResult.UNDERFLOW;
+            int offsetsPos = (offsets==null)?0:offsets.position();
+            utf16BOM.limit(utf16BOM.capacity());
+            /*
+             * If we detect a BOM in this buffer, then we must add the BOM size to the
+             * offsets because the actual converter function will not see and count the BOM.
+             * offsetDelta will have the number of the BOM bytes that are in the current buffer.
+             */
+            offsetDelta=0;
+            state=mode;
+            int pos = source.position();
+            while(pos < source.limit()) {
+                switch(state) {
+                case 0:
+                    b=source.get(pos);
+                    if(b==(byte)0xfe) {
+                        state=1; /* could be FE FF */
+                    } else if(b==(byte)0xff) {
+                        state=5; /* could be FF FE */
+                    } else {
+                        state=8; /* default to UTF-16BE */
+                        continue;
+                    }
+                    pos++;
+                    break;
+                case 1:
+                case 5:
+                    if(source.get(pos)==utf16BOM.get(state)) {
+                        ++pos;
+                        if(state==1) {
+                            state=8; /* detect UTF-16BE */
+                            offsetDelta=pos-source.position();
+                        } else if(state==5) {
+                            state=9; /* detect UTF-16LE */
+                            offsetDelta=pos-source.position();
+                        }
+                    } else {
+                        /* switch to UTF-16BE and pass the previous bytes */
+                        if(pos!=source.position()) {
+                            /* just reset the source */
+                            pos=source.position();
+                        } else {
+                            boolean oldFlush=flush;
+                            int bomIndex = state&4;
+                            ByteBuffer oldSource = source;
+                            source = utf16BOM;
+                            utf16BOM.position(bomIndex);/* select the correct BOM */
+                            source.limit(bomIndex+1);/* replay previous byte */
+                            flush = false; /* this sourceLimit is not the real source stream limit */
+                            cr = decodeLoopUTF16BE(source, target, offsets, flush);
+                            /* restore real pointers; pArgs->source will be set in case 8/9 */
+                            flush = oldFlush;
+                            source = oldSource;
+                        }
+                        state=8;
+                        continue;
+                    }
+                    break;
+                case 8:
+                case 9:
+                    mode = state;
+                    source.position(pos);
+                    cr = decodeLoopImpl(source, target, offsets, flush);
+                    pos = source.position();
+                    break;
+                default:
+                    break; /* does not occur */
+                }
+                if(cr.isOverflow() || cr.isError()){
+                    break;
+                }
+            }
+            
+
+            /* add BOM size to offsets - see comment at offsetDelta declaration */
+            if(offsets!=null && offsetDelta!=0) {
+                int offsetsLimit=offsets.position();
+                while(offsetsPos<offsetsLimit) {
+                    int delta = offsetDelta + offsets.get(pos);
+                    offsets.put(pos++, delta);
+                }
+            }
+
+            source.position(pos);
+            
+            if(!source.hasRemaining() && flush) {
+                /* handle truncated input */
+                switch(state) {
+                case 0:
+                    break; /* no input at all, nothing to do */
+                case 8:
+                    cr = decodeLoopUTF16BE(source, target, offsets, flush);
+                    break;
+                case 9:
+                    cr = decodeLoopUTF16LE(source, target, offsets, flush);
+                    break;
+                default:
+                    /* handle 0<state<8: call UTF-16BE with too-short input */
+                    boolean oldFlush=flush;
+                    int bomIndex = state&4;
+                    ByteBuffer oldSource = source;
+                    source = utf16BOM;
+                    utf16BOM.position(bomIndex);/* select the correct BOM */
+                    source.limit(bomIndex+1);/* replay previous byte */
+                    flush = false; /* this sourceLimit is not the real source stream limit */
+                    cr = decodeLoopUTF16BE(source, target, offsets, flush);
+                    /* restore real pointers; pArgs->source will be set in case 8/9 */
+                    flush = oldFlush;
+                    source = oldSource;
+                    state=8;
+                    break;
+                }
+            }
+
+            mode=state;
+            return cr;
+        }
+        protected CoderResult decodeLoopImpl(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){
+           
+            CoderResult cr = CoderResult.UNDERFLOW;
+            if(mode==8){
+                /* call UTF-16BE */
+                cr = decodeLoopUTF16BE(source, target, offsets, flush);
+            }else if(mode==9){
+                /* call UTF-16LE */
+                cr =decodeLoopUTF16LE(source, target, offsets, flush);
+            }else{
+                /* should not occur */
+                throw new InternalError("Unknown State in UTF-16 converter!");
+            }
+            return cr;
+        }
+        final CoderResult decodeLoopUTF16BE(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){
+            CoderResult cr = CoderResult.UNDERFLOW;
+
            if(!source.hasRemaining() && toUnicodeStatus==0) {
                /* no input, nothing to do */
                return cr;
-            }
+            }            
            if(!target.hasRemaining()) {
                return CoderResult.OVERFLOW;
            }
@ -241,7 +376,219 @@ class CharsetUTF16 extends CharsetICU {

            return cr;
        }
+        final CoderResult decodeLoopUTF16LE(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){
+            CoderResult cr = CoderResult.UNDERFLOW;
+            if(!source.hasRemaining() && toUnicodeStatus==0) {
+                /* no input, nothing to do */
+                return cr;
+            }
+            if(!target.hasRemaining()) {
+                return CoderResult.OVERFLOW;
+            }
        
+            int sourceIndex=0, count=0, length, sourceArrayIndex;
+            char c=0, trail;
+            length = source.remaining();
+            sourceArrayIndex = source.position();
+
+            /* complete a partial UChar or pair from the last call */
+            if(toUnicodeStatus!=0) {
+                /*
+                 * special case: single byte from a previous buffer,
+                 * where the byte turned out not to belong to a trail surrogate
+                 * and the preceding, unmatched lead surrogate was put into toUBytes[]
+                 * for error handling
+                 */
+                toUBytesArray[toUBytesBegin+0]=(byte)toUnicodeStatus;
+                toULength=1;
+                toUnicodeStatus=0;
+            }
+            if((count=toULength)!=0) {
+                byte[] pArray=toUBytesArray;
+                int pArrayIndex = toUBytesBegin;
+                do {
+                    pArray[count++]=source.get(sourceArrayIndex++);
+                    ++sourceIndex;
+                    --length;
+                    if(count==2) {
+                        c=(char)(((pArray[pArrayIndex+1]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+0]&UConverterConstants.UNSIGNED_BYTE_MASK));
+                        if(!UTF16.isSurrogate(c)) {
+                            /* output the BMP code point */
+                            target.put(c);
+                            if(offsets!=null) {
+                                offsets.put(-1);
+                            }
+                            count=0;
+                            c=0;
+                            break;
+                        } else if(UTF16.isLeadSurrogate(c)) {
+                            /* continue collecting bytes for the trail surrogate */
+                            c=0; /* avoid unnecessary surrogate handling below */
+                        } else {
+                            /* fall through to error handling for an unmatched trail surrogate */
+                            break;
+                        }
+                    } else if(count==4) {
+                        c=(char)(((pArray[pArrayIndex+1]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+0]&UConverterConstants.UNSIGNED_BYTE_MASK));
+                        trail=(char)(((pArray[pArrayIndex+3]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+2]&UConverterConstants.UNSIGNED_BYTE_MASK));
+                        if(UTF16.isTrailSurrogate(trail)) {
+                            /* output the surrogate pair */
+                            target.put(c);
+                            if(target.remaining()>=1) {
+                                target.put(trail);
+                                if(offsets!=null) {
+                                    offsets.put(-1);
+                                    offsets.put(-1);
+                                }
+                            } else /* targetCapacity==1 */ {
+                                charErrorBufferArray[charErrorBufferBegin+0]=trail;
+                                charErrorBufferLength=1;
+                            return CoderResult.OVERFLOW;
+                            }
+                            count=0;
+                            c=0;
+                            break;
+                        } else {
+                            /* unmatched lead surrogate, handle here for consistent toUBytes[] */
+        
+                            /* back out reading the code unit after it */
+                            if((source.position()-sourceArrayIndex)>=2) {
+                                sourceArrayIndex-=2;
+                            } else {
+                                /*
+                                 * if the trail unit's first byte was in a previous buffer, then
+                                 * we need to put it into a special place because toUBytes[] will be
+                                 * used for the lead unit's bytes
+                                 */
+                                toUnicodeStatus=0x100|pArray[pArrayIndex+2];
+                                --sourceArrayIndex;
+                            }
+                            toULength=2;
+                            cr = CoderResult.malformedForLength(sourceArrayIndex);
+                            break;
+                        }
+                    }
+                } while(length>0);
+                toULength=(byte)count;
+            }
+        
+            /* copy an even number of bytes for complete UChars */
+            count=2*target.remaining();
+            if(count>length) {
+                count=length&~1;
+            }
+            if(c==0 && count>0) {
+                length-=count;
+                count>>=1;
+                //targetCapacity-=count;
+                if(offsets==null) {
+                    do {
+                        c=(char)(((source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK));
+                        sourceArrayIndex+=2;
+                        if(!UTF16.isSurrogate(c)) {
+                            target.put(c);
+                        } else if(UTF16.isLeadSurrogate(c) && count>=2 &&
+                                  UTF16.isTrailSurrogate(trail=(char)(((source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)))
+                                 ) {
+                            sourceArrayIndex+=2;
+                            --count;
+                            target.put(c);
+                            target.put(trail);
+                        } else {
+                            break;
+                        }
+                    } while(--count>0);
+                } else {
+                    do {
+                        c=(char)(((source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK));
+                        sourceArrayIndex+=2;
+                        if(!UTF16.isSurrogate(c)) {
+                            target.put(c);
+                            offsets.put(sourceIndex);
+                            sourceIndex+=2;
+                        } else if(UTF16.isLeadSurrogate(c) && count>=2 &&
+                                  UTF16.isTrailSurrogate(trail=(char)(((source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)))
+                        ) {
+                            sourceArrayIndex+=2;
+                            --count;
+                            target.put(c);
+                            target.put(trail);
+                            offsets.put(sourceIndex);
+                            offsets.put(sourceIndex);
+                            sourceIndex+=4;
+                        } else {
+                            break;
+                        }
+                    } while(--count>0);
+                }
+        
+                if(count==0) {
+                    /* done with the loop for complete UChars */
+                    c=0;
+                } else {
+                    /* keep c for surrogate handling, trail will be set there */
+                    length+=2*(count-1); /* one more byte pair was consumed than count decremented */
+                }
+            }
+        
+            if(c!=0) {
+                /*
+                 * c is a surrogate, and
+                 * - source or target too short
+                 * - or the surrogate is unmatched
+                 */       
+ 
+                toUBytesArray[toUBytesBegin+0]=(byte)c;
+                toUBytesArray[toUBytesBegin+1]=(byte)(c>>>8);    
+                toULength=2;
+        
+                if(UTF16.isLeadSurrogate(c)) {
+                    if(length>=2) {
+                        if(UTF16.isTrailSurrogate(trail=(char)(((source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)))) {
+                            /* output the surrogate pair, will overflow (see conditions comment above) */
+                            sourceArrayIndex+=2;
+                            length-=2;
+                            target.put(c);
+                            if(offsets!=null) {
+                                offsets.put(sourceIndex);
+                            }
+                            charErrorBufferArray[charErrorBufferBegin+0]=trail;
+                            charErrorBufferLength=1;
+                            toULength=0;
+                            cr = CoderResult.OVERFLOW;
+                        } else {
+                            /* unmatched lead surrogate */
+                            cr = CoderResult.malformedForLength(sourceArrayIndex);
+                        }
+                    } else {
+                        /* see if the trail surrogate is in the next buffer */
+                    }
+                } else {
+                    /* unmatched trail surrogate */
+                    cr = CoderResult.malformedForLength(sourceArrayIndex);
+                }
+            }
+        
+           
+            /* check for a remaining source byte */
+            if(!cr.isError()){
+                if(length>0) {
+                    if(!target.hasRemaining()) {
+                        cr = CoderResult.OVERFLOW;
+                    } else {
+                        /* it must be length==1 because otherwise the above would have copied more */
+                        toUBytesArray[toULength++]=source.get(sourceArrayIndex++);
+                    }
+                }
+            }
+            source.position(sourceArrayIndex);
+
+            return cr;
+        }
+
+        protected void implReset() {
+            super.implReset();
+        }
    }
    class CharsetEncoderUTF16 extends CharsetEncoderICU{

@ -250,13 +597,11 @@ class CharsetUTF16 extends CharsetICU {
            implReset();
        }

-        private final static int NEED_TO_WRITE_BOM = 1;
-        
        protected void implReset() {
            super.implReset();
            fromUnicodeStatus = NEED_TO_WRITE_BOM;
+            writeBOM = true;
        }
-        
        protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush){
            CoderResult cr = CoderResult.UNDERFLOW;
            if(!source.hasRemaining()) {
--- a/icu4j/src/com/ibm/icu/charset/CharsetUTF16BE.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetUTF16BE.java
@ -0,0 +1,50 @@
+/**
+*******************************************************************************
+* Copyright (C) 2007, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+*******************************************************************************
+*/ 
+package com.ibm.icu.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+class CharsetUTF16BE extends CharsetUTF16 {
+    public CharsetUTF16BE(String icuCanonicalName, String javaCanonicalName, String[] aliases){
+        super(icuCanonicalName, javaCanonicalName, aliases);
+    }
+    class CharsetDecoderUTF16BE extends CharsetDecoderUTF16{
+        
+        public CharsetDecoderUTF16BE(CharsetICU cs) {
+            super(cs);
+        }
+        protected CoderResult decodeLoopImpl(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){
+            return decodeLoopUTF16BE(source, target, offsets, flush);
+        }
+    }
+    public CharsetDecoder newDecoder() {
+        return new CharsetDecoderUTF16BE(this);
+    }
+    class CharsetEncoderUTF16BE extends CharsetEncoderUTF16{
+
+        public CharsetEncoderUTF16BE(CharsetICU cs) {
+            super(cs);
+            implReset();
+        }
+
+        protected void implReset() {
+            super.implReset();
+            fromUnicodeStatus = 0;
+            writeBOM = false;
+        }
+    }
+    public CharsetEncoder newEncoder() {
+        return new CharsetEncoderUTF16BE(this);
+    }
+}
--- a/icu4j/src/com/ibm/icu/charset/CharsetUTF16LE.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetUTF16LE.java
@ -20,245 +20,33 @@ import com.ibm.icu.text.UTF16;
 /**
 * @author Niti Hantaweepant
 */
-class CharsetUTF16LE extends CharsetICU {
-    
-    protected byte[] fromUSubstitution = new byte[]{(byte)0xfd, (byte)0xff};
+class CharsetUTF16LE extends CharsetUTF16 {
    
    public CharsetUTF16LE(String icuCanonicalName, String javaCanonicalName, String[] aliases){
        super(icuCanonicalName, javaCanonicalName, aliases);
-        maxBytesPerChar = 4;
-        minBytesPerChar = 2;
-        maxCharsPerByte = 1;
+        fromUSubstitution = new byte[]{(byte)0xfd, (byte)0xff};
    }
-    class CharsetDecoderUTF16LE extends CharsetDecoderICU{

+    class CharsetDecoderUTF16LE extends CharsetDecoderUTF16{
+        
        public CharsetDecoderUTF16LE(CharsetICU cs) {
            super(cs);
        }
-
-        protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){
-            CoderResult cr = CoderResult.UNDERFLOW;
-            if(!source.hasRemaining() && toUnicodeStatus==0) {
-                /* no input, nothing to do */
-                return cr;
-            }
-            if(!target.hasRemaining()) {
-                return CoderResult.OVERFLOW;
-            }
-        
-            int sourceIndex=0, count=0, length, sourceArrayIndex;
-            char c=0, trail;
-            length = source.remaining();
-            sourceArrayIndex = source.position();
-
-            /* complete a partial UChar or pair from the last call */
-            if(toUnicodeStatus!=0) {
-                /*
-                 * special case: single byte from a previous buffer,
-                 * where the byte turned out not to belong to a trail surrogate
-                 * and the preceding, unmatched lead surrogate was put into toUBytes[]
-                 * for error handling
-                 */
-                toUBytesArray[toUBytesBegin+0]=(byte)toUnicodeStatus;
-                toULength=1;
-                toUnicodeStatus=0;
-            }
-            if((count=toULength)!=0) {
-                byte[] pArray=toUBytesArray;
-                int pArrayIndex = toUBytesBegin;
-                do {
-                    pArray[count++]=source.get(sourceArrayIndex++);
-                    ++sourceIndex;
-                    --length;
-                    if(count==2) {
-                        c=(char)(((pArray[pArrayIndex+1]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+0]&UConverterConstants.UNSIGNED_BYTE_MASK));
-                        if(!UTF16.isSurrogate(c)) {
-                            /* output the BMP code point */
-                            target.put(c);
-                            if(offsets!=null) {
-                                offsets.put(-1);
-                            }
-                            count=0;
-                            c=0;
-                            break;
-                        } else if(UTF16.isLeadSurrogate(c)) {
-                            /* continue collecting bytes for the trail surrogate */
-                            c=0; /* avoid unnecessary surrogate handling below */
-                        } else {
-                            /* fall through to error handling for an unmatched trail surrogate */
-                            break;
-                        }
-                    } else if(count==4) {
-                        c=(char)(((pArray[pArrayIndex+1]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+0]&UConverterConstants.UNSIGNED_BYTE_MASK));
-                        trail=(char)(((pArray[pArrayIndex+3]&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(pArray[pArrayIndex+2]&UConverterConstants.UNSIGNED_BYTE_MASK));
-                        if(UTF16.isTrailSurrogate(trail)) {
-                            /* output the surrogate pair */
-                            target.put(c);
-                            if(target.remaining()>=1) {
-                                target.put(trail);
-                                if(offsets!=null) {
-                                    offsets.put(-1);
-                                    offsets.put(-1);
-                                }
-                            } else /* targetCapacity==1 */ {
-                                charErrorBufferArray[charErrorBufferBegin+0]=trail;
-                                charErrorBufferLength=1;
-                            return CoderResult.OVERFLOW;
-                            }
-                            count=0;
-                            c=0;
-                            break;
-                        } else {
-                            /* unmatched lead surrogate, handle here for consistent toUBytes[] */
-        
-                            /* back out reading the code unit after it */
-                            if((source.position()-sourceArrayIndex)>=2) {
-                                sourceArrayIndex-=2;
-                            } else {
-                                /*
-                                 * if the trail unit's first byte was in a previous buffer, then
-                                 * we need to put it into a special place because toUBytes[] will be
-                                 * used for the lead unit's bytes
-                                 */
-                                toUnicodeStatus=0x100|pArray[pArrayIndex+2];
-                                --sourceArrayIndex;
-                            }
-                            toULength=2;
-                            cr = CoderResult.malformedForLength(sourceArrayIndex);
-                            break;
-                        }
-                    }
-                } while(length>0);
-                toULength=(byte)count;
-            }
-        
-            /* copy an even number of bytes for complete UChars */
-            count=2*target.remaining();
-            if(count>length) {
-                count=length&~1;
-            }
-            if(c==0 && count>0) {
-                length-=count;
-                count>>=1;
-                //targetCapacity-=count;
-                if(offsets==null) {
-                    do {
-                        c=(char)(((source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK));
-                        sourceArrayIndex+=2;
-                        if(!UTF16.isSurrogate(c)) {
-                            target.put(c);
-                        } else if(UTF16.isLeadSurrogate(c) && count>=2 &&
-                                  UTF16.isTrailSurrogate(trail=(char)(((source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)))
-                                 ) {
-                            sourceArrayIndex+=2;
-                            --count;
-                            target.put(c);
-                            target.put(trail);
-                        } else {
-                            break;
-                        }
-                    } while(--count>0);
-                } else {
-                    do {
-                        c=(char)(((source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK));
-                        sourceArrayIndex+=2;
-                        if(!UTF16.isSurrogate(c)) {
-                            target.put(c);
-                            offsets.put(sourceIndex);
-                            sourceIndex+=2;
-                        } else if(UTF16.isLeadSurrogate(c) && count>=2 &&
-                                  UTF16.isTrailSurrogate(trail=(char)(((source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)))
-                        ) {
-                            sourceArrayIndex+=2;
-                            --count;
-                            target.put(c);
-                            target.put(trail);
-                            offsets.put(sourceIndex);
-                            offsets.put(sourceIndex);
-                            sourceIndex+=4;
-                        } else {
-                            break;
-                        }
-                    } while(--count>0);
-                }
-        
-                if(count==0) {
-                    /* done with the loop for complete UChars */
-                    c=0;
-                } else {
-                    /* keep c for surrogate handling, trail will be set there */
-                    length+=2*(count-1); /* one more byte pair was consumed than count decremented */
-                }
-            }
-        
-            if(c!=0) {
-                /*
-                 * c is a surrogate, and
-                 * - source or target too short
-                 * - or the surrogate is unmatched
-                 */		  
- 
-                toUBytesArray[toUBytesBegin+0]=(byte)c;
-                toUBytesArray[toUBytesBegin+1]=(byte)(c>>>8);	 
-                toULength=2;
-        
-                if(UTF16.isLeadSurrogate(c)) {
-                    if(length>=2) {
-                        if(UTF16.isTrailSurrogate(trail=(char)(((source.get(sourceArrayIndex+1)&UConverterConstants.UNSIGNED_BYTE_MASK)<<8)|(source.get(sourceArrayIndex+0)&UConverterConstants.UNSIGNED_BYTE_MASK)))) {
-                            /* output the surrogate pair, will overflow (see conditions comment above) */
-                            sourceArrayIndex+=2;
-                            length-=2;
-                            target.put(c);
-                            if(offsets!=null) {
-                                offsets.put(sourceIndex);
-                            }
-                            charErrorBufferArray[charErrorBufferBegin+0]=trail;
-                            charErrorBufferLength=1;
-                            toULength=0;
-                            cr = CoderResult.OVERFLOW;
-                        } else {
-                            /* unmatched lead surrogate */
-                            cr = CoderResult.malformedForLength(sourceArrayIndex);
-                        }
-                    } else {
-                        /* see if the trail surrogate is in the next buffer */
-                    }
-                } else {
-                    /* unmatched trail surrogate */
-                    cr = CoderResult.malformedForLength(sourceArrayIndex);
-                }
-            }
-        
-           
-            /* check for a remaining source byte */
-            if(!cr.isError()){
-                if(length>0) {
-                    if(!target.hasRemaining()) {
-                        cr = CoderResult.OVERFLOW;
-                    } else {
-                        /* it must be length==1 because otherwise the above would have copied more */
-                        toUBytesArray[toULength++]=source.get(sourceArrayIndex++);
-                    }
-                }
-            }
-            source.position(sourceArrayIndex);
-
-            return cr;
+        protected CoderResult decodeLoopImpl(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){
+            return decodeLoopUTF16LE(source, target, offsets, flush);
        }
-        
    }
+    
    class CharsetEncoderUTF16LE extends CharsetEncoderICU{
-
+        
        public CharsetEncoderUTF16LE(CharsetICU cs) {
            super(cs, fromUSubstitution);
            implReset();
        }
-
-        private final static int NEED_TO_WRITE_BOM = 1;
        
        protected void implReset() {
            super.implReset();
-            fromUnicodeStatus = NEED_TO_WRITE_BOM;
+            fromUnicodeStatus = 0;
        }
        
        protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush){
--- a/icu4j/src/com/ibm/icu/charset/CharsetUTF32.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetUTF32.java
@ -29,13 +29,86 @@ class CharsetUTF32 extends CharsetICU {
        minBytesPerChar = 4;
        maxCharsPerByte = 1;
    }
+    
+    
    class CharsetDecoderUTF32 extends CharsetDecoderICU{
-
+        boolean isFirstBuffer;
+        final int SIGNATURE_LENGTH=4;
        public CharsetDecoderUTF32(CharsetICU cs) {
            super(cs);
+            isFirstBuffer = true;
        }
-
+        
        protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){
+            int state, offsetDelta;
+            int offsetsPos = (offsets==null)?0:offsets.position();
+            CoderResult cr = CoderResult.UNDERFLOW;
+            state=mode;
+
+            /*
+             * If we detect a BOM in this buffer, then we must add the BOM size to the
+             * offsets because the actual converter function will not see and count the BOM.
+             * offsetDelta will have the number of the BOM bytes that are in the current buffer.
+             */
+            offsetDelta=0;
+            int pos = source.position();
+            if(isFirstBuffer && toULength<SIGNATURE_LENGTH){
+                while(pos < source.limit() && pos < toULength) {
+                    toUBytesArray[toULength++] = source.get(pos++);
+                }
+                if(toULength==SIGNATURE_LENGTH){
+                    if(toUBytesArray[0]==0x00 && toUBytesArray[1]==0x00 && toUBytesArray[2]==0xFE && toUBytesArray[3]==0xFF){
+                        // may be BE
+                        state = 1;
+                        offsetDelta=4;
+                    }else if(toUBytesArray[0]==0xFF && toUBytesArray[1]==0xFE && toUBytesArray[2]==0x00 && toUBytesArray[3]==0x00){
+                        //may be LE
+                        state = 2;
+                        offsetDelta=4;
+                    }else{
+                        //default to the subclass charset
+                        state = 3;
+                        toUnicodeStatus = getChar(toUBytesArray, toULength)+1;  
+                    }
+                    isFirstBuffer = false;
+                }
+            }
+
+            /* add BOM size to offsets - see comment at offsetDelta declaration */
+            if(offsets!=null && offsetDelta!=0) {
+                int offsetsLimit=offsets.position();
+                while(offsetsPos<offsetsLimit) {
+                    int delta = offsetDelta + offsets.get(pos);
+                    offsets.put(pos++, delta);
+                }
+            }
+            
+            source.position(pos);
+            if(!cr.isError() && source.hasRemaining()){
+                cr = decodeLoopImpl(source, target, offsets, flush);
+            }
+            mode=state;
+            return cr;
+        }
+        protected int getChar(byte[] bytes, int length){
+            return -1;
+        }
+        protected CoderResult decodeLoopImpl(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){
+            
+            CoderResult cr = CoderResult.UNDERFLOW;
+            if(mode==1){
+                /* call UTF-16BE */
+                cr = decodeLoopUTF32BE(source, target, offsets, flush);
+            }else if(mode==2){
+                /* call UTF-16LE */
+                cr =decodeLoopUTF32LE(source, target, offsets, flush);
+            }else{
+                /* should not occur */
+                cr = decodeLoopUTF32BE(source, target, offsets, flush);
+            }
+            return cr;
+        }
+        final CoderResult decodeLoopUTF32BE(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){
            CoderResult cr = CoderResult.UNDERFLOW;
            
            int sourceArrayIndex = source.position();
@ -151,6 +224,127 @@ class CharsetUTF32 extends CharsetICU {
            source.position(sourceArrayIndex);
            return cr;
        }        
+
+        final CoderResult decodeLoopUTF32LE(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){
+            CoderResult cr = CoderResult.UNDERFLOW;
+            
+            int sourceArrayIndex = source.position();
+            int ch, i;
+
+            donefornow:
+            {                    
+                /* UTF-8 returns here for only non-offset, this needs to change.*/
+                if (toUnicodeStatus != 0 && target.hasRemaining()) {
+                    i = toULength;       /* restore # of bytes consumed */
+            
+                    ch = (int)(toUnicodeStatus - 1);/*Stores the previously calculated ch from a previous call*/
+                    toUnicodeStatus = 0;
+                    toULength=0;
+                    
+                    while (i < 4) {
+                        if (sourceArrayIndex < source.limit()) {
+                            ch |= (source.get(sourceArrayIndex) & UConverterConstants.UNSIGNED_BYTE_MASK) << (i * 8);
+                            toUBytesArray[i++] = (byte) source.get(sourceArrayIndex++);
+                        }
+                        else {
+                            /* stores a partially calculated target*/
+                            /* + 1 to make 0 a valid character */
+                            toUnicodeStatus = ch + 1;
+                            toULength = (byte) i;
+                            break donefornow;
+                        }
+                    }
+            
+                    if (ch <= UConverterConstants.MAXIMUM_UTF && !isSurrogate(ch)) {
+                        /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
+                        if (ch <= UConverterConstants.MAXIMUM_UCS2) 
+                        {
+                            /* fits in 16 bits */
+                            target.put((char)ch);
+                        }
+                        else {
+                            /* write out the surrogates */
+                            target.put(UTF16.getLeadSurrogate(ch));
+                            ch = UTF16.getTrailSurrogate(ch);
+                            if (target.hasRemaining()) {
+                                target.put((char)ch);
+                            }
+                            else {
+                                /* Put in overflow buffer (not handled here) */
+                                charErrorBufferArray[0] = (char) ch;
+                                charErrorBufferLength = 1;
+                                cr = CoderResult.OVERFLOW;
+                            }
+                        }
+                    }
+                    else {
+                        toULength = (byte)i;
+                        cr = CoderResult.malformedForLength(sourceArrayIndex);
+                        break donefornow;
+                    }
+                }
+                
+                while (sourceArrayIndex < source.limit() && target.hasRemaining()) {
+                    i = 0;
+                    ch = 0;
+            
+                    while (i < 4) {
+                        if (sourceArrayIndex < source.limit()) {
+                            ch |= (source.get(sourceArrayIndex) & UConverterConstants.UNSIGNED_BYTE_MASK) << (i * 8);
+                            toUBytesArray[i++] = (byte) source.get(sourceArrayIndex++);
+                        }
+                        else {
+                            /* stores a partially calculated target*/
+                            /* + 1 to make 0 a valid character */
+                            toUnicodeStatus = ch + 1;
+                            toULength = (byte) i;
+                            break donefornow;
+                        }
+                    }
+            
+                    if (ch <= UConverterSharedData.MAXIMUM_UTF && !isSurrogate(ch)) {
+                        /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
+                        if (ch <= UConverterSharedData.MAXIMUM_UCS2) 
+                        {
+                            /* fits in 16 bits */
+                            target.put((char) ch);
+                        }
+                        else {
+                            /* write out the surrogates */
+                            target.put(UTF16.getLeadSurrogate(ch));
+                            ch = UTF16.getTrailSurrogate(ch);
+                            if (target.hasRemaining()) {
+                                target.put((char)ch);
+                            }
+                            else {
+                                /* Put in overflow buffer (not handled here) */
+                                charErrorBufferArray[0] = (char) ch;
+                                charErrorBufferLength = 1;
+                                cr = CoderResult.OVERFLOW;                                    
+                                break;
+                            }
+                        }
+                    }
+                    else {
+                        toULength = (byte)i;
+                        cr = CoderResult.malformedForLength(sourceArrayIndex);
+                        break;
+                    }
+                }
+            }
+            
+            if (sourceArrayIndex < source.limit() && !target.hasRemaining()) {
+                /* End of target buffer */
+                cr = CoderResult.OVERFLOW;
+            }                    
+            
+            source.position(sourceArrayIndex);
+            return cr;
+        }  
+        protected void implReset() {
+            super.implReset();
+            isFirstBuffer = true;
+        }
    }
    
    class CharsetEncoderUTF32 extends CharsetEncoderICU{
@ -158,10 +352,9 @@ class CharsetUTF32 extends CharsetICU {
        public CharsetEncoderUTF32(CharsetICU cs) {
            super(cs, fromUSubstitution);
            implReset();
+            writeBOM = true;
        }

-        private final static int NEED_TO_WRITE_BOM = 1;
-        
        protected void implReset() {
            super.implReset();
            fromUnicodeStatus = NEED_TO_WRITE_BOM;
--- a/icu4j/src/com/ibm/icu/charset/CharsetUTF32BE.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetUTF32BE.java
@ -0,0 +1,58 @@
+/**
+*******************************************************************************
+* Copyright (C) 2007, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+*******************************************************************************
+*/ 
+package com.ibm.icu.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.IntBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+class CharsetUTF32BE extends CharsetUTF32 {
+    public CharsetUTF32BE(String icuCanonicalName, String javaCanonicalName, String[] aliases){
+        super(icuCanonicalName, javaCanonicalName, aliases);
+    }
+    class CharsetDecoderUTF32BE extends CharsetDecoderUTF32{
+        
+        public CharsetDecoderUTF32BE(CharsetICU cs) {
+            super(cs);
+        }
+        protected CoderResult decodeLoopImpl(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){
+            return decodeLoopUTF32BE(source, target, offsets, flush);
+        }
+        protected int getChar(byte[] bytes, int length){
+            int i=0, ch=0;
+            while (i<length){
+                ch |= (bytes[i] & UConverterConstants.UNSIGNED_BYTE_MASK) << (i * 8);
+                i++;
+            }
+            return ch;
+        }
+    }
+    public CharsetDecoder newDecoder() {
+        return new CharsetDecoderUTF32BE(this);
+    }
+    class CharsetEncoderUTF32BE extends CharsetEncoderUTF32{
+
+        public CharsetEncoderUTF32BE(CharsetICU cs) {
+            super(cs);
+            implReset();
+        }
+
+        protected void implReset() {
+            super.implReset();
+            fromUnicodeStatus = 0;
+            writeBOM = false;
+        }
+    }
+    public CharsetEncoder newEncoder() {
+        return new CharsetEncoderUTF32BE(this);
+    }
+}
--- a/icu4j/src/com/ibm/icu/charset/CharsetUTF32LE.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetUTF32LE.java
@ -19,7 +19,7 @@ import com.ibm.icu.text.UTF16;
 /**
 * @author Niti Hantaweepant
 */
-class CharsetUTF32LE extends CharsetICU {
+class CharsetUTF32LE extends CharsetUTF32 {
    
    protected byte[] fromUSubstitution = new byte[]{(byte)0xfd, (byte)0xff, (byte)0, (byte)0};
    
@ -29,142 +29,34 @@ class CharsetUTF32LE extends CharsetICU {
        minBytesPerChar = 4;
        maxCharsPerByte = 1;
    }
-    class CharsetDecoderUTF32LE extends CharsetDecoderICU{
-
+    class CharsetDecoderUTF32LE extends CharsetDecoderUTF32{
+        
        public CharsetDecoderUTF32LE(CharsetICU cs) {
            super(cs);
        }
-
-        protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){
-            CoderResult cr = CoderResult.UNDERFLOW;
-            
-            int sourceArrayIndex = source.position();
-            int ch, i;
-
-            donefornow:
-            {                    
-                /* UTF-8 returns here for only non-offset, this needs to change.*/
-                if (toUnicodeStatus != 0 && target.hasRemaining()) {
-                    i = toULength;       /* restore # of bytes consumed */
-            
-                    ch = (int)(toUnicodeStatus - 1);/*Stores the previously calculated ch from a previous call*/
-                    toUnicodeStatus = 0;
-                    toULength=0;
-                    
-                    while (i < 4) {
-                        if (sourceArrayIndex < source.limit()) {
-                            ch |= (source.get(sourceArrayIndex) & UConverterConstants.UNSIGNED_BYTE_MASK) << (i * 8);
-                            toUBytesArray[i++] = (byte) source.get(sourceArrayIndex++);
-                        }
-                        else {
-                            /* stores a partially calculated target*/
-                            /* + 1 to make 0 a valid character */
-                            toUnicodeStatus = ch + 1;
-                            toULength = (byte) i;
-                            break donefornow;
-                        }
-                    }
-            
-                    if (ch <= UConverterConstants.MAXIMUM_UTF && !isSurrogate(ch)) {
-                        /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
-                        if (ch <= UConverterConstants.MAXIMUM_UCS2) 
-                        {
-                            /* fits in 16 bits */
-                            target.put((char)ch);
-                        }
-                        else {
-                            /* write out the surrogates */
-                            target.put(UTF16.getLeadSurrogate(ch));
-                            ch = UTF16.getTrailSurrogate(ch);
-                            if (target.hasRemaining()) {
-                                target.put((char)ch);
-                            }
-                            else {
-                                /* Put in overflow buffer (not handled here) */
-                                charErrorBufferArray[0] = (char) ch;
-                                charErrorBufferLength = 1;
-                                cr = CoderResult.OVERFLOW;
-                            }
-                        }
-                    }
-                    else {
-                        toULength = (byte)i;
-                        cr = CoderResult.malformedForLength(sourceArrayIndex);
-                        break donefornow;
-                    }
-                }
-                
-                while (sourceArrayIndex < source.limit() && target.hasRemaining()) {
-                    i = 0;
-                    ch = 0;
-            
-                    while (i < 4) {
-                        if (sourceArrayIndex < source.limit()) {
-                            ch |= (source.get(sourceArrayIndex) & UConverterConstants.UNSIGNED_BYTE_MASK) << (i * 8);
-                            toUBytesArray[i++] = (byte) source.get(sourceArrayIndex++);
-                        }
-                        else {
-                            /* stores a partially calculated target*/
-                            /* + 1 to make 0 a valid character */
-                            toUnicodeStatus = ch + 1;
-                            toULength = (byte) i;
-                            break donefornow;
-                        }
-                    }
-            
-                    if (ch <= UConverterSharedData.MAXIMUM_UTF && !isSurrogate(ch)) {
-                        /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */
-                        if (ch <= UConverterSharedData.MAXIMUM_UCS2) 
-                        {
-                            /* fits in 16 bits */
-                            target.put((char) ch);
-                        }
-                        else {
-                            /* write out the surrogates */
-                            target.put(UTF16.getLeadSurrogate(ch));
-                            ch = UTF16.getTrailSurrogate(ch);
-                            if (target.hasRemaining()) {
-                                target.put((char)ch);
-                            }
-                            else {
-                                /* Put in overflow buffer (not handled here) */
-                                charErrorBufferArray[0] = (char) ch;
-                                charErrorBufferLength = 1;
-                                cr = CoderResult.OVERFLOW;                                    
-                                break;
-                            }
-                        }
-                    }
-                    else {
-                        toULength = (byte)i;
-                        cr = CoderResult.malformedForLength(sourceArrayIndex);
-                        break;
-                    }
-                }
+        protected CoderResult decodeLoopImpl(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){
+            return decodeLoopUTF32LE(source, target, offsets, flush);
+        }
+        protected int getChar(byte[] bytes, int length){
+            int i=0;
+            int ch=0;
+            while(i<length){
+                ch |= (bytes[i] & UConverterConstants.UNSIGNED_BYTE_MASK) << (i * 8);
+                i++;
            }
-            
-            if (sourceArrayIndex < source.limit() && !target.hasRemaining()) {
-                /* End of target buffer */
-                cr = CoderResult.OVERFLOW;
-            }                    
-            
-            source.position(sourceArrayIndex);
-            return cr;
-        }        
+            return ch;
+        }
    }
-    
    class CharsetEncoderUTF32LE extends CharsetEncoderICU{

        public CharsetEncoderUTF32LE(CharsetICU cs) {
            super(cs, fromUSubstitution);
            implReset();
        }
-
-        private final static int NEED_TO_WRITE_BOM = 1;
        
        protected void implReset() {
            super.implReset();
-            fromUnicodeStatus = NEED_TO_WRITE_BOM;
+            fromUnicodeStatus = 0;
        }
        
        protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush){
--- a/icu4j/src/com/ibm/icu/dev/test/charset/TestCharset.java
+++ b/icu4j/src/com/ibm/icu/dev/test/charset/TestCharset.java
@ -47,6 +47,7 @@ public class TestCharset extends TestFmwk {
            (byte) 0x00,(byte) 0x0d,
            (byte) 0x00,(byte) 0x0a };
    static final byte[] expectedByteStr ={
+        (byte) 0xfe,(byte) 0xff,
        (byte) 0x00,(byte) 'a',
        (byte) 0x00,(byte) 'b',
        (byte) 0x00,(byte) 'c',
@ -76,7 +77,7 @@ public class TestCharset extends TestFmwk {
    }
    public void TestUTF16Converter(){
        CharsetProvider icu = new CharsetProviderICU();
-        Charset cs1 = icu.charsetForName("UTF-16");
+        Charset cs1 = icu.charsetForName("UTF-16BE");
        CharsetEncoder e1 = cs1.newEncoder();
        CharsetDecoder d1 = cs1.newDecoder();
        
@ -168,7 +169,7 @@ public class TestCharset extends TestFmwk {
    }
    public void TestUTF32Converter(){
        CharsetProvider icu = new CharsetProviderICU();
-        Charset cs1 = icu.charsetForName("UTF-32");
+        Charset cs1 = icu.charsetForName("UTF-32BE");
        CharsetEncoder e1 = cs1.newEncoder();
        CharsetDecoder d1 = cs1.newDecoder();
        
@ -176,7 +177,7 @@ public class TestCharset extends TestFmwk {
        CharsetEncoder e2 = cs2.newEncoder();
        CharsetDecoder d2 = cs2.newDecoder();
        
-        for(int i=0x1d827; i<0x10FFFF; i+=0xFF){
+        for(int i=0x000; i<0x10FFFF; i+=0xFF){
            CharBuffer us = CharBuffer.allocate(0xFF*2);
            ByteBuffer bs1 = ByteBuffer.allocate(0xFF*8);
            ByteBuffer bs2 = ByteBuffer.allocate(0xFF*8);
@ -868,12 +869,12 @@ public class TestCharset extends TestFmwk {
        CharBuffer inBuf = CharBuffer.allocate(in.length);
        inBuf.put(in);
        CharsetEncoder encoder = cs.newEncoder();
-        ByteBuffer outBuf = ByteBuffer.allocate(in.length*2);
+        ByteBuffer outBuf = ByteBuffer.allocate(in.length*2+2);
        inBuf.rewind();
        encoder.encode(inBuf, outBuf, true);
        outBuf.rewind();
-        if(outBuf.remaining()> in.length*2){
-            errln("The UTF16 encoder appended bom. Length returned: " + outBuf.remaining());
+        if(outBuf.get(0)!= (byte)0xFE && outBuf.get(1)!= (byte)0xFF){
+            errln("The UTF16 encoder did not appended bom. Length returned: " + outBuf.remaining());
        }
        while(outBuf.hasRemaining()){
            logln("0x"+hex(outBuf.get()));
@ -881,7 +882,19 @@ public class TestCharset extends TestFmwk {
        CharsetDecoder decoder = cs.newDecoder();
        outBuf.rewind();
        CharBuffer rt = CharBuffer.allocate(in.length);
-        decoder.decode(outBuf, rt, true);
+        CoderResult cr = decoder.decode(outBuf, rt, true);
+        if(cr.isError()){
+            errln("Decoding with BOM failed. Error: "+ cr.toString());
+        }
+        equals(rt, in);
+        {
+            rt.clear();
+            outBuf.rewind();
+            Charset utf16 = Charset.forName("UTF-16");
+            CharsetDecoder dc = utf16.newDecoder();
+            cr = dc.decode(outBuf, rt, true);
+            equals(rt, in);
+        }
    }
     
    private void smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target) {
@ -1531,4 +1544,49 @@ public class TestCharset extends TestFmwk {
        }
        return null;
    }
+    public void TestUTF32BOM(){
+
+        Charset cs = (new CharsetProviderICU()).charsetForName("UTF-32");
+        char[] in = new char[] { 0xd800, 0xdc00, 
+                                 0xd801, 0xdc01,
+                                 0xdbff, 0xdfff, 
+                                 0xd900, 0xdd00, 
+                                 0x0000, 0x0041,
+                                 0x0000, 0x0042,
+                                 0x0000, 0x0043};
+        
+        CharBuffer inBuf = CharBuffer.allocate(in.length);
+        inBuf.put(in);
+        CharsetEncoder encoder = cs.newEncoder();
+        ByteBuffer outBuf = ByteBuffer.allocate(in.length*4+4);
+        inBuf.rewind();
+        encoder.encode(inBuf, outBuf, true);
+        outBuf.rewind();
+        if(outBuf.get(0)!= (byte)0x00 && outBuf.get(1)!= (byte)0x00 && 
+                outBuf.get(2)!= (byte)0xFF && outBuf.get(3)!= (byte)0xFE){
+            errln("The UTF16 encoder did not appended bom. Length returned: " + outBuf.remaining());
+        }
+        while(outBuf.hasRemaining()){
+            logln("0x"+hex(outBuf.get()));
+        }
+        CharsetDecoder decoder = cs.newDecoder();
+        outBuf.limit(outBuf.position());
+        outBuf.rewind();
+        CharBuffer rt = CharBuffer.allocate(in.length);
+        CoderResult cr = decoder.decode(outBuf, rt, true);
+        if(cr.isError()){
+            errln("Decoding with BOM failed. Error: "+ cr.toString());
+        }
+        equals(rt, in);
+        try{
+            rt.clear();
+            outBuf.rewind();
+            Charset utf16 = Charset.forName("UTF-32");
+            CharsetDecoder dc = utf16.newDecoder();
+            cr = dc.decode(outBuf, rt, true);
+            equals(rt, in);
+        }catch(UnsupportedCharsetException ex){
+            // swallow the expection.
+        }
+    }
 }