ICU-8038 Implement UnicodeLittle and UnicodeBig in ICU4J.
X-SVN-Rev: 28818
This commit is contained in:
parent
5dac211295
commit
99102b01d2
@ -125,7 +125,9 @@ public abstract class CharsetICU extends Charset{
|
||||
algorithmicCharsets.put("ISO-8859-1", "com.ibm.icu.charset.Charset88591" );
|
||||
algorithmicCharsets.put("UTF-16", "com.ibm.icu.charset.CharsetUTF16" );
|
||||
algorithmicCharsets.put("UTF-16BE", "com.ibm.icu.charset.CharsetUTF16BE" );
|
||||
algorithmicCharsets.put("UTF-16BE,version=1", "com.ibm.icu.charset.CharsetUTF16BE" );
|
||||
algorithmicCharsets.put("UTF-16LE", "com.ibm.icu.charset.CharsetUTF16LE" );
|
||||
algorithmicCharsets.put("UTF-16LE,version=1", "com.ibm.icu.charset.CharsetUTF16LE" );
|
||||
algorithmicCharsets.put("UTF16_OppositeEndian", "com.ibm.icu.charset.CharsetUTF16LE" );
|
||||
algorithmicCharsets.put("UTF16_PlatformEndian", "com.ibm.icu.charset.CharsetUTF16" );
|
||||
algorithmicCharsets.put("UTF-32", "com.ibm.icu.charset.CharsetUTF32" );
|
||||
|
@ -35,10 +35,20 @@ class CharsetUTF16 extends CharsetICU {
|
||||
private int endianXOR;
|
||||
private byte[] bom;
|
||||
private byte[] fromUSubstitution;
|
||||
|
||||
private int version;
|
||||
|
||||
public CharsetUTF16(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
|
||||
super(icuCanonicalName, javaCanonicalName, aliases);
|
||||
|
||||
/* Get the version number (e.g. UTF-16LE,version=1) */
|
||||
int versionIndex = icuCanonicalName.indexOf("version=");
|
||||
if (versionIndex > 0) {
|
||||
version = Integer.decode(icuCanonicalName.substring(versionIndex+8, versionIndex+9)).intValue();
|
||||
} else {
|
||||
version = 0;
|
||||
}
|
||||
|
||||
this.isEndianSpecified = (this instanceof CharsetUTF16BE || this instanceof CharsetUTF16LE);
|
||||
this.isBigEndian = !(this instanceof CharsetUTF16LE);
|
||||
|
||||
@ -98,10 +108,22 @@ class CharsetUTF16 extends CharsetICU {
|
||||
actualEndianXOR = ENDIAN_XOR_LE;
|
||||
} else {
|
||||
// we do not have a BOM (and we have toULength==1 bytes)
|
||||
actualBOM = null;
|
||||
actualEndianXOR = endianXOR;
|
||||
if (isEndianSpecified && version == 1) {
|
||||
actualBOM = isBigEndian ? CharsetUTF16.BOM_BE : CharsetUTF16.BOM_LE;
|
||||
actualEndianXOR = isBigEndian ? CharsetUTF16.ENDIAN_XOR_BE : CharsetUTF16.ENDIAN_XOR_LE;
|
||||
} else {
|
||||
actualBOM = null;
|
||||
actualEndianXOR = endianXOR;
|
||||
}
|
||||
break;
|
||||
}
|
||||
} else if (isEndianSpecified && version == 1 && (toUBytesArray[toULength - 1] == actualBOM[toULength - 2] && toUBytesArray[toULength - 2] == actualBOM[toULength - 1])) {
|
||||
return CoderResult.malformedForLength(2);
|
||||
} else if (isEndianSpecified && version == 1 && (toUBytesArray[toULength - 1] == actualBOM[toULength - 1] && toUBytesArray[toULength - 2] == actualBOM[toULength - 2])) {
|
||||
// we found a BOM! at last!
|
||||
// too bad we have to get ignore it now (like it was unwanted or something)
|
||||
toULength = 0;
|
||||
break;
|
||||
} else if (isEndianSpecified || toUBytesArray[toULength - 1] != actualBOM[toULength - 1]) {
|
||||
// we do not have a BOM (and we have toULength bytes)
|
||||
actualBOM = null;
|
||||
@ -135,6 +157,15 @@ class CharsetUTF16 extends CharsetICU {
|
||||
return CoderResult.UNDERFLOW;
|
||||
toUBytesArray[toULength++] = source.get();
|
||||
}
|
||||
|
||||
if (isEndianSpecified && version == 1 && (toUBytesArray[toULength - 1] == actualBOM[toULength - 2] && toUBytesArray[toULength - 2] == actualBOM[toULength - 1])) {
|
||||
return CoderResult.malformedForLength(2);
|
||||
} else if (isEndianSpecified && version == 1 && (toUBytesArray[toULength - 1] == actualBOM[toULength - 1] && toUBytesArray[toULength - 2] == actualBOM[toULength - 2])) {
|
||||
// we found a BOM! at last!
|
||||
// too bad we have to get ignore it now (like it was unwanted or something)
|
||||
toULength = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!target.hasRemaining())
|
||||
return CoderResult.OVERFLOW;
|
||||
@ -202,12 +233,12 @@ class CharsetUTF16 extends CharsetICU {
|
||||
|
||||
public CharsetEncoderUTF16(CharsetICU cs) {
|
||||
super(cs, fromUSubstitution);
|
||||
fromUnicodeStatus = isEndianSpecified ? 0 : NEED_TO_WRITE_BOM;
|
||||
fromUnicodeStatus = (isEndianSpecified && version != 1) ? 0 : NEED_TO_WRITE_BOM;
|
||||
}
|
||||
|
||||
protected void implReset() {
|
||||
super.implReset();
|
||||
fromUnicodeStatus = isEndianSpecified ? 0 : NEED_TO_WRITE_BOM;
|
||||
fromUnicodeStatus = (isEndianSpecified && version != 1) ? 0 : NEED_TO_WRITE_BOM;
|
||||
}
|
||||
|
||||
protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) {
|
||||
|
Loading…
Reference in New Issue
Block a user