ICU-8038 Implement UnicodeLittle and UnicodeBig in ICU4J.

X-SVN-Rev: 28818
This commit is contained in:
Michael Ow 2010-10-13 20:21:16 +00:00
parent 5dac211295
commit 99102b01d2
2 changed files with 37 additions and 4 deletions

View File

@ -125,7 +125,9 @@ public abstract class CharsetICU extends Charset{
algorithmicCharsets.put("ISO-8859-1", "com.ibm.icu.charset.Charset88591" );
algorithmicCharsets.put("UTF-16", "com.ibm.icu.charset.CharsetUTF16" );
algorithmicCharsets.put("UTF-16BE", "com.ibm.icu.charset.CharsetUTF16BE" );
algorithmicCharsets.put("UTF-16BE,version=1", "com.ibm.icu.charset.CharsetUTF16BE" );
algorithmicCharsets.put("UTF-16LE", "com.ibm.icu.charset.CharsetUTF16LE" );
algorithmicCharsets.put("UTF-16LE,version=1", "com.ibm.icu.charset.CharsetUTF16LE" );
algorithmicCharsets.put("UTF16_OppositeEndian", "com.ibm.icu.charset.CharsetUTF16LE" );
algorithmicCharsets.put("UTF16_PlatformEndian", "com.ibm.icu.charset.CharsetUTF16" );
algorithmicCharsets.put("UTF-32", "com.ibm.icu.charset.CharsetUTF32" );

View File

@ -35,10 +35,20 @@ class CharsetUTF16 extends CharsetICU {
private int endianXOR;
private byte[] bom;
private byte[] fromUSubstitution;
private int version;
public CharsetUTF16(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
super(icuCanonicalName, javaCanonicalName, aliases);
/* Get the version number (e.g. UTF-16LE,version=1) */
int versionIndex = icuCanonicalName.indexOf("version=");
if (versionIndex > 0) {
version = Integer.decode(icuCanonicalName.substring(versionIndex+8, versionIndex+9)).intValue();
} else {
version = 0;
}
this.isEndianSpecified = (this instanceof CharsetUTF16BE || this instanceof CharsetUTF16LE);
this.isBigEndian = !(this instanceof CharsetUTF16LE);
@ -98,10 +108,22 @@ class CharsetUTF16 extends CharsetICU {
actualEndianXOR = ENDIAN_XOR_LE;
} else {
// we do not have a BOM (and we have toULength==1 bytes)
actualBOM = null;
actualEndianXOR = endianXOR;
if (isEndianSpecified && version == 1) {
actualBOM = isBigEndian ? CharsetUTF16.BOM_BE : CharsetUTF16.BOM_LE;
actualEndianXOR = isBigEndian ? CharsetUTF16.ENDIAN_XOR_BE : CharsetUTF16.ENDIAN_XOR_LE;
} else {
actualBOM = null;
actualEndianXOR = endianXOR;
}
break;
}
} else if (isEndianSpecified && version == 1 && (toUBytesArray[toULength - 1] == actualBOM[toULength - 2] && toUBytesArray[toULength - 2] == actualBOM[toULength - 1])) {
return CoderResult.malformedForLength(2);
} else if (isEndianSpecified && version == 1 && (toUBytesArray[toULength - 1] == actualBOM[toULength - 1] && toUBytesArray[toULength - 2] == actualBOM[toULength - 2])) {
// we found a BOM! at last!
// too bad we have to get ignore it now (like it was unwanted or something)
toULength = 0;
break;
} else if (isEndianSpecified || toUBytesArray[toULength - 1] != actualBOM[toULength - 1]) {
// we do not have a BOM (and we have toULength bytes)
actualBOM = null;
@ -135,6 +157,15 @@ class CharsetUTF16 extends CharsetICU {
return CoderResult.UNDERFLOW;
toUBytesArray[toULength++] = source.get();
}
if (isEndianSpecified && version == 1 && (toUBytesArray[toULength - 1] == actualBOM[toULength - 2] && toUBytesArray[toULength - 2] == actualBOM[toULength - 1])) {
return CoderResult.malformedForLength(2);
} else if (isEndianSpecified && version == 1 && (toUBytesArray[toULength - 1] == actualBOM[toULength - 1] && toUBytesArray[toULength - 2] == actualBOM[toULength - 2])) {
// we found a BOM! at last!
// too bad we have to get ignore it now (like it was unwanted or something)
toULength = 0;
continue;
}
if (!target.hasRemaining())
return CoderResult.OVERFLOW;
@ -202,12 +233,12 @@ class CharsetUTF16 extends CharsetICU {
public CharsetEncoderUTF16(CharsetICU cs) {
super(cs, fromUSubstitution);
fromUnicodeStatus = isEndianSpecified ? 0 : NEED_TO_WRITE_BOM;
fromUnicodeStatus = (isEndianSpecified && version != 1) ? 0 : NEED_TO_WRITE_BOM;
}
protected void implReset() {
super.implReset();
fromUnicodeStatus = isEndianSpecified ? 0 : NEED_TO_WRITE_BOM;
fromUnicodeStatus = (isEndianSpecified && version != 1) ? 0 : NEED_TO_WRITE_BOM;
}
protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {