ICU-5768 Added the storing of the mode in the constructor of UTF-32 LE and BE for correct endianness. Also, test for the charset was added to test BOM testing.

X-SVN-Rev: 21922
2007-07-06 21:32:51 +00:00 · 2007-07-06 21:32:51 +00:00 · ccc581a96c
commit ccc581a96c
parent 2563257241
4 changed files with 108 additions and 11 deletions
--- a/icu4j/src/com/ibm/icu/charset/CharsetUTF32.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetUTF32.java
@ -57,17 +57,17 @@ class CharsetUTF32 extends CharsetICU {
                    toUBytesArray[toULength++] = source.get(pos++);
                }
                if(toULength==SIGNATURE_LENGTH){
-                    if(toUBytesArray[0]==0x00 && toUBytesArray[1]==0x00 && toUBytesArray[2]==0xFE && toUBytesArray[3]==0xFF){
+                    if(toUBytesArray[0]==(byte)0x00 && toUBytesArray[1]==(byte)0x00 && toUBytesArray[2]==(byte)0xFE && toUBytesArray[3]==(byte)0xFF){
                        // may be BE
                        state = 1;
                        offsetDelta=4;
-                    }else if(toUBytesArray[0]==0xFF && toUBytesArray[1]==0xFE && toUBytesArray[2]==0x00 && toUBytesArray[3]==0x00){
+                    }else if(toUBytesArray[0]==(byte)0xFF && toUBytesArray[1]==(byte)0xFE && toUBytesArray[2]==(byte)0x00 && toUBytesArray[3]==(byte)0x00){
                        //may be LE
                        state = 2;
                        offsetDelta=4;
                    }else{
                        //default to the subclass charset
-                        state = 3;
+                        //state = 3;
                        toUnicodeStatus = getChar(toUBytesArray, toULength)+1;  
                    }
                    isFirstBuffer = false;
@ -83,11 +83,12 @@ class CharsetUTF32 extends CharsetICU {
                }
            }
            
+            mode=state;
            source.position(pos);
            if(!cr.isError() && source.hasRemaining()){
                cr = decodeLoopImpl(source, target, offsets, flush);
            }
-            mode=state;
+
            return cr;
        }
        protected int getChar(byte[] bytes, int length){
@ -97,14 +98,15 @@ class CharsetUTF32 extends CharsetICU {
            
            CoderResult cr = CoderResult.UNDERFLOW;
            if(mode==1){
-                /* call UTF-16BE */
+                /* call UTF-32BE */
                cr = decodeLoopUTF32BE(source, target, offsets, flush);
            }else if(mode==2){
-                /* call UTF-16LE */
+                /* call UTF-32LE */
                cr =decodeLoopUTF32LE(source, target, offsets, flush);
            }else{
                /* should not occur */
-                cr = decodeLoopUTF32BE(source, target, offsets, flush);
+                //cr = decodeLoopUTF32BE(source, target, offsets, flush);
+                cr = CoderResult.malformedForLength(source.position());
            }
            return cr;
        }
--- a/icu4j/src/com/ibm/icu/charset/CharsetUTF32BE.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetUTF32BE.java
@ -23,6 +23,7 @@ class CharsetUTF32BE extends CharsetUTF32 {
        
        public CharsetDecoderUTF32BE(CharsetICU cs) {
            super(cs);
+            mode=1;
        }
        protected CoderResult decodeLoopImpl(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){
            return decodeLoopUTF32BE(source, target, offsets, flush);
--- a/icu4j/src/com/ibm/icu/charset/CharsetUTF32LE.java
+++ b/icu4j/src/com/ibm/icu/charset/CharsetUTF32LE.java
@ -33,6 +33,7 @@ class CharsetUTF32LE extends CharsetUTF32 {
        
        public CharsetDecoderUTF32LE(CharsetICU cs) {
            super(cs);
+            mode=2;
        }
        protected CoderResult decodeLoopImpl(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){
            return decodeLoopUTF32LE(source, target, offsets, flush);
--- a/icu4j/src/com/ibm/icu/dev/test/charset/TestCharset.java
+++ b/icu4j/src/com/ibm/icu/dev/test/charset/TestCharset.java
@ -222,12 +222,12 @@ public class TestCharset extends TestFmwk {
             

            ByteBuffer newBS = ByteBuffer.allocate(bs1.capacity());
-            /*
+            
            newBS.put((byte)0x00);
            newBS.put((byte)0x00);
            newBS.put((byte)0xFE);
            newBS.put((byte)0xFF);
-            */
+            
            newBS.put(bs1);
            bs1.position(0);
            smBufDecode(d1, "UTF-32", bs1, us);
@ -235,12 +235,12 @@ public class TestCharset extends TestFmwk {
            
            
            newBS.clear();
-            /*
+            
            newBS.put((byte)0xFF);
            newBS.put((byte)0xFE);
            newBS.put((byte)0x00);
            newBS.put((byte)0x00);
-            */
+            
            newBS.put(bs2);    
            bs2.position(0);
            smBufDecode(d2, "UTF-32LE", bs2, us);
@ -3218,4 +3218,97 @@ public class TestCharset extends TestFmwk {
            errln("Error while encoding UTF-16LE (3) should have occured.");
        }          
    }
+    
+    //provide better code coverage for the generic charset UTF32
+    public void TestCharsetUTF32() {
+        CoderResult result = CoderResult.UNDERFLOW;
+        CharsetProvider provider = new CharsetProviderICU();
+        Charset cs = provider.charsetForName("UTF-32");        
+        CharsetDecoder decoder = cs.newDecoder();
+        
+        char us_array[] = {
+                0x0000, 0x0000, 0x0000, 0x0000,
+            };
+        
+        byte bs_array1[] = {
+                (byte)0x00, (byte)0x00, (byte)0xFE, (byte)0xFF,
+                (byte)0x00, (byte)0x00, (byte)0x04, (byte)0x43,
+                (byte)0xFF, (byte)0xFE, (byte)0x00, (byte)0x00,
+                (byte)0x43, (byte)0x04, (byte)0x00, (byte)0x00,
+            };
+        
+        byte bs_array2[] = {
+                (byte)0xFF, (byte)0xFE, (byte)0x00, (byte)0x00,
+                (byte)0x43, (byte)0x04, (byte)0x00, (byte)0x00,
+            };
+        
+        CharBuffer us = CharBuffer.allocate(us_array.length);
+        ByteBuffer bs = ByteBuffer.allocate(bs_array1.length);
+        
+        us.put(us_array);
+        bs.put(bs_array1);
+        
+        us.limit(us.position());
+        us.position(0);
+        bs.limit(bs.position());
+        bs.position(0);
+            
+        try {
+            smBufDecode(decoder, "UTF32-DE-1", bs, us, true, false);
+            errln("Malform exception while decoding UTF32 charset (1) should have been thrown.");
+        } catch (Exception ex) {
+        }
+        
+        decoder = cs.newDecoder();
+        
+        bs = ByteBuffer.allocate(bs_array2.length);
+        bs.put(bs_array2);
+        
+        us.limit(4);
+        us.position(0);
+        bs.limit(bs.position());
+        bs.position(0);
+            
+        try {
+            smBufDecode(decoder, "UTF32-DE-2", bs, us, true, false);
+            errln("Malform exception while decoding UTF32 charset (2) should have been thrown.");
+        } catch (Exception ex) {
+        }
+        
+        //Test malform exception
+        bs.clear();
+        us.clear();
+        
+        bs.put((byte)0x00); bs.put((byte)0xFE); bs.put((byte)0xFF); bs.put((byte)0x00); bs.put((byte)0x00);
+        us.put((char)0x0000);
+        
+        us.limit(us.position());
+        us.position(0);
+        bs.limit(bs.position());
+        bs.position(0);
+        
+        try {
+            smBufDecode(decoder, "UTF32-DE-3", bs, us, true, false);
+            errln("Malform exception while decoding UTF32 charset (3) should have been thrown.");
+        } catch (Exception ex) {
+        }
+        
+        //Test BOM testing
+        bs.clear();
+        us.clear();
+        
+        bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xFF); bs.put((byte)0xFE); 
+        us.put((char)0x0000);
+        
+        us.limit(us.position());
+        us.position(0);
+        bs.limit(bs.position());
+        bs.position(0);
+        
+        try {
+            smBufDecode(decoder, "UTF32-DE-4", bs, us, true, false);
+            errln("Malform exception while decoding UTF32 charset (4) should have been thrown.");
+        } catch (Exception ex) {
+        }
+    }
 }