ICU-5768 Added the storing of the mode in the constructor of UTF-32 LE and BE for correct endianness. Also, test for the charset was added to test BOM testing.

X-SVN-Rev: 21922
This commit is contained in:
Michael Ow 2007-07-06 21:32:51 +00:00
parent 2563257241
commit ccc581a96c
4 changed files with 108 additions and 11 deletions

View File

@ -57,17 +57,17 @@ class CharsetUTF32 extends CharsetICU {
toUBytesArray[toULength++] = source.get(pos++);
}
if(toULength==SIGNATURE_LENGTH){
if(toUBytesArray[0]==0x00 && toUBytesArray[1]==0x00 && toUBytesArray[2]==0xFE && toUBytesArray[3]==0xFF){
if(toUBytesArray[0]==(byte)0x00 && toUBytesArray[1]==(byte)0x00 && toUBytesArray[2]==(byte)0xFE && toUBytesArray[3]==(byte)0xFF){
// may be BE
state = 1;
offsetDelta=4;
}else if(toUBytesArray[0]==0xFF && toUBytesArray[1]==0xFE && toUBytesArray[2]==0x00 && toUBytesArray[3]==0x00){
}else if(toUBytesArray[0]==(byte)0xFF && toUBytesArray[1]==(byte)0xFE && toUBytesArray[2]==(byte)0x00 && toUBytesArray[3]==(byte)0x00){
//may be LE
state = 2;
offsetDelta=4;
}else{
//default to the subclass charset
state = 3;
//state = 3;
toUnicodeStatus = getChar(toUBytesArray, toULength)+1;
}
isFirstBuffer = false;
@ -83,11 +83,12 @@ class CharsetUTF32 extends CharsetICU {
}
}
mode=state;
source.position(pos);
if(!cr.isError() && source.hasRemaining()){
cr = decodeLoopImpl(source, target, offsets, flush);
}
mode=state;
return cr;
}
protected int getChar(byte[] bytes, int length){
@ -97,14 +98,15 @@ class CharsetUTF32 extends CharsetICU {
CoderResult cr = CoderResult.UNDERFLOW;
if(mode==1){
/* call UTF-16BE */
/* call UTF-32BE */
cr = decodeLoopUTF32BE(source, target, offsets, flush);
}else if(mode==2){
/* call UTF-16LE */
/* call UTF-32LE */
cr =decodeLoopUTF32LE(source, target, offsets, flush);
}else{
/* should not occur */
cr = decodeLoopUTF32BE(source, target, offsets, flush);
//cr = decodeLoopUTF32BE(source, target, offsets, flush);
cr = CoderResult.malformedForLength(source.position());
}
return cr;
}

View File

@ -23,6 +23,7 @@ class CharsetUTF32BE extends CharsetUTF32 {
public CharsetDecoderUTF32BE(CharsetICU cs) {
super(cs);
mode=1;
}
protected CoderResult decodeLoopImpl(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){
return decodeLoopUTF32BE(source, target, offsets, flush);

View File

@ -33,6 +33,7 @@ class CharsetUTF32LE extends CharsetUTF32 {
public CharsetDecoderUTF32LE(CharsetICU cs) {
super(cs);
mode=2;
}
protected CoderResult decodeLoopImpl(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush){
return decodeLoopUTF32LE(source, target, offsets, flush);

View File

@ -222,12 +222,12 @@ public class TestCharset extends TestFmwk {
ByteBuffer newBS = ByteBuffer.allocate(bs1.capacity());
/*
newBS.put((byte)0x00);
newBS.put((byte)0x00);
newBS.put((byte)0xFE);
newBS.put((byte)0xFF);
*/
newBS.put(bs1);
bs1.position(0);
smBufDecode(d1, "UTF-32", bs1, us);
@ -235,12 +235,12 @@ public class TestCharset extends TestFmwk {
newBS.clear();
/*
newBS.put((byte)0xFF);
newBS.put((byte)0xFE);
newBS.put((byte)0x00);
newBS.put((byte)0x00);
*/
newBS.put(bs2);
bs2.position(0);
smBufDecode(d2, "UTF-32LE", bs2, us);
@ -3218,4 +3218,97 @@ public class TestCharset extends TestFmwk {
errln("Error while encoding UTF-16LE (3) should have occured.");
}
}
//provide better code coverage for the generic charset UTF32
public void TestCharsetUTF32() {
CoderResult result = CoderResult.UNDERFLOW;
CharsetProvider provider = new CharsetProviderICU();
Charset cs = provider.charsetForName("UTF-32");
CharsetDecoder decoder = cs.newDecoder();
char us_array[] = {
0x0000, 0x0000, 0x0000, 0x0000,
};
byte bs_array1[] = {
(byte)0x00, (byte)0x00, (byte)0xFE, (byte)0xFF,
(byte)0x00, (byte)0x00, (byte)0x04, (byte)0x43,
(byte)0xFF, (byte)0xFE, (byte)0x00, (byte)0x00,
(byte)0x43, (byte)0x04, (byte)0x00, (byte)0x00,
};
byte bs_array2[] = {
(byte)0xFF, (byte)0xFE, (byte)0x00, (byte)0x00,
(byte)0x43, (byte)0x04, (byte)0x00, (byte)0x00,
};
CharBuffer us = CharBuffer.allocate(us_array.length);
ByteBuffer bs = ByteBuffer.allocate(bs_array1.length);
us.put(us_array);
bs.put(bs_array1);
us.limit(us.position());
us.position(0);
bs.limit(bs.position());
bs.position(0);
try {
smBufDecode(decoder, "UTF32-DE-1", bs, us, true, false);
errln("Malform exception while decoding UTF32 charset (1) should have been thrown.");
} catch (Exception ex) {
}
decoder = cs.newDecoder();
bs = ByteBuffer.allocate(bs_array2.length);
bs.put(bs_array2);
us.limit(4);
us.position(0);
bs.limit(bs.position());
bs.position(0);
try {
smBufDecode(decoder, "UTF32-DE-2", bs, us, true, false);
errln("Malform exception while decoding UTF32 charset (2) should have been thrown.");
} catch (Exception ex) {
}
//Test malform exception
bs.clear();
us.clear();
bs.put((byte)0x00); bs.put((byte)0xFE); bs.put((byte)0xFF); bs.put((byte)0x00); bs.put((byte)0x00);
us.put((char)0x0000);
us.limit(us.position());
us.position(0);
bs.limit(bs.position());
bs.position(0);
try {
smBufDecode(decoder, "UTF32-DE-3", bs, us, true, false);
errln("Malform exception while decoding UTF32 charset (3) should have been thrown.");
} catch (Exception ex) {
}
//Test BOM testing
bs.clear();
us.clear();
bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xFF); bs.put((byte)0xFE);
us.put((char)0x0000);
us.limit(us.position());
us.position(0);
bs.limit(bs.position());
bs.position(0);
try {
smBufDecode(decoder, "UTF32-DE-4", bs, us, true, false);
errln("Malform exception while decoding UTF32 charset (4) should have been thrown.");
} catch (Exception ex) {
}
}
}